Merge branch 'hfi1' into k.o/for-4.14
authorDoug Ledford <dledford@redhat.com>
Mon, 24 Jul 2017 12:33:43 +0000 (08:33 -0400)
committerDoug Ledford <dledford@redhat.com>
Mon, 24 Jul 2017 12:33:43 +0000 (08:33 -0400)
54 files changed:
drivers/infiniband/hw/hfi1/Makefile
drivers/infiniband/hw/hfi1/affinity.c
drivers/infiniband/hw/hfi1/affinity.h
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/chip.h
drivers/infiniband/hw/hfi1/common.h
drivers/infiniband/hw/hfi1/driver.c
drivers/infiniband/hw/hfi1/eprom.c
drivers/infiniband/hw/hfi1/exp_rcv.c [new file with mode: 0644]
drivers/infiniband/hw/hfi1/exp_rcv.h [new file with mode: 0644]
drivers/infiniband/hw/hfi1/file_ops.c
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/mad.c
drivers/infiniband/hw/hfi1/mad.h
drivers/infiniband/hw/hfi1/mmu_rb.c
drivers/infiniband/hw/hfi1/mmu_rb.h
drivers/infiniband/hw/hfi1/pcie.c
drivers/infiniband/hw/hfi1/platform.c
drivers/infiniband/hw/hfi1/qp.c
drivers/infiniband/hw/hfi1/rc.c
drivers/infiniband/hw/hfi1/ruc.c
drivers/infiniband/hw/hfi1/sdma.c
drivers/infiniband/hw/hfi1/trace.c
drivers/infiniband/hw/hfi1/trace_ibhdrs.h
drivers/infiniband/hw/hfi1/trace_misc.h
drivers/infiniband/hw/hfi1/trace_rx.h
drivers/infiniband/hw/hfi1/uc.c
drivers/infiniband/hw/hfi1/ud.c
drivers/infiniband/hw/hfi1/user_exp_rcv.c
drivers/infiniband/hw/hfi1/user_exp_rcv.h
drivers/infiniband/hw/hfi1/user_sdma.c
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/hfi1/verbs.h
drivers/infiniband/hw/hfi1/vnic_main.c
drivers/infiniband/hw/qib/qib.h
drivers/infiniband/hw/qib/qib_iba6120.c
drivers/infiniband/hw/qib/qib_iba7220.c
drivers/infiniband/hw/qib/qib_iba7322.c
drivers/infiniband/hw/qib/qib_mad.c
drivers/infiniband/hw/qib/qib_pcie.c
drivers/infiniband/hw/qib/qib_ruc.c
drivers/infiniband/hw/qib/qib_ud.c
drivers/infiniband/hw/qib/qib_verbs.h
drivers/infiniband/sw/rdmavt/mr.c
drivers/infiniband/sw/rdmavt/qp.c
drivers/infiniband/sw/rdmavt/trace_mr.h
drivers/infiniband/sw/rdmavt/trace_tx.h
drivers/infiniband/sw/rdmavt/vt.c
drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
include/rdma/ib_hdrs.h
include/rdma/ib_verbs.h
include/rdma/rdma_vt.h
include/rdma/rdmavt_qp.h

index 88085f65432e90800ddbb88134cd3447b7719415..66d538c033b0133a801205c350a928c899a40407 100644 (file)
@@ -8,7 +8,7 @@
 obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
 
 hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
-       eprom.o file_ops.o firmware.o \
+       eprom.o exp_rcv.o file_ops.o firmware.o \
        init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
        qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
        uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
index e2cd2cd3b28a88201c7b1f8488c1aece32305855..a97055dd4fbdeeefcd9be4b39deebb5939e958eb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -335,10 +335,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
        sde->cpu = cpu;
        cpumask_clear(&msix->mask);
        cpumask_set_cpu(cpu, &msix->mask);
-       dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n",
-                  msix->msix.vector, irq_type_names[msix->type],
+       dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n",
+                  msix->irq, irq_type_names[msix->type],
                   sde->this_idx, cpu);
-       irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+       irq_set_affinity_hint(msix->irq, &msix->mask);
 
        /*
         * Set the new cpu in the hfi1_affinity_node and clean
@@ -387,7 +387,7 @@ static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
 {
        struct irq_affinity_notify *notify = &msix->notify;
 
-       notify->irq = msix->msix.vector;
+       notify->irq = msix->irq;
        notify->notify = hfi1_irq_notifier_notify;
        notify->release = hfi1_irq_notifier_release;
 
@@ -472,10 +472,10 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
        }
 
        cpumask_set_cpu(cpu, &msix->mask);
-       dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n",
-                   msix->msix.vector, irq_type_names[msix->type],
+       dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n",
+                   msix->irq, irq_type_names[msix->type],
                    extra, cpu);
-       irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+       irq_set_affinity_hint(msix->irq, &msix->mask);
 
        if (msix->type == IRQ_SDMA) {
                sde->cpu = cpu;
@@ -533,7 +533,7 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
                }
        }
 
-       irq_set_affinity_hint(msix->msix.vector, NULL);
+       irq_set_affinity_hint(msix->irq, NULL);
        cpumask_clear(&msix->mask);
        mutex_unlock(&node_affinity.lock);
 }
index e78c7aa094e03d9be639f6035e67d0a674c372fb..2a1e374169c0aa3c3fff087516e2a501448372f0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -75,24 +75,26 @@ struct hfi1_msix_entry;
 /* Initialize non-HT cpu cores mask */
 void init_real_cpu_mask(void);
 /* Initialize driver affinity data */
-int hfi1_dev_affinity_init(struct hfi1_devdata *);
+int hfi1_dev_affinity_init(struct hfi1_devdata *dd);
 /*
  * Set IRQ affinity to a CPU. The function will determine the
  * CPU and set the affinity to it.
  */
-int hfi1_get_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+int hfi1_get_irq_affinity(struct hfi1_devdata *dd,
+                         struct hfi1_msix_entry *msix);
 /*
  * Remove the IRQ's CPU affinity. This function also updates
  * any internal CPU tracking data
  */
-void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
+                          struct hfi1_msix_entry *msix);
 /*
  * Determine a CPU affinity for a user process, if the process does not
  * have an affinity set yet.
  */
-int hfi1_get_proc_affinity(int);
+int hfi1_get_proc_affinity(int node);
 /* Release a CPU used by a user process. */
-void hfi1_put_proc_affinity(int);
+void hfi1_put_proc_affinity(int cpu);
 
 struct hfi1_affinity_node {
        int node;
index 94b54850ec75b7273eb63b961ffe7ebfbc0ee5c5..937350d9deab0b70df0c246a08c57cfa96110d14 100644 (file)
@@ -1012,14 +1012,15 @@ static struct flag_table dc8051_info_err_flags[] = {
  */
 static struct flag_table dc8051_info_host_msg_flags[] = {
        FLAG_ENTRY0("Host request done", 0x0001),
-       FLAG_ENTRY0("BC SMA message", 0x0002),
-       FLAG_ENTRY0("BC PWR_MGM message", 0x0004),
+       FLAG_ENTRY0("BC PWR_MGM message", 0x0002),
+       FLAG_ENTRY0("BC SMA message", 0x0004),
        FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008),
        FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010),
        FLAG_ENTRY0("External device config request", 0x0020),
        FLAG_ENTRY0("VerifyCap all frames received", 0x0040),
        FLAG_ENTRY0("LinkUp achieved", 0x0080),
        FLAG_ENTRY0("Link going down", 0x0100),
+       FLAG_ENTRY0("Link width downgraded", 0x0200),
 };
 
 static u32 encoded_size(u32 size);
@@ -1066,6 +1067,8 @@ static int thermal_init(struct hfi1_devdata *dd);
 
 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
                                  int msecs);
+static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
+                                  int msecs);
 static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
 static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
 static void handle_temp_err(struct hfi1_devdata *dd);
@@ -6906,7 +6909,7 @@ static void reset_neighbor_info(struct hfi1_pportdata *ppd)
 
 static const char * const link_down_reason_strs[] = {
        [OPA_LINKDOWN_REASON_NONE] = "None",
-       [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Recive error 0",
+       [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Receive error 0",
        [OPA_LINKDOWN_REASON_BAD_PKT_LEN] = "Bad packet length",
        [OPA_LINKDOWN_REASON_PKT_TOO_LONG] = "Packet too long",
        [OPA_LINKDOWN_REASON_PKT_TOO_SHORT] = "Packet too short",
@@ -9373,13 +9376,13 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
 
        if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
            (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
-               dd_dev_info(dd, "%s: QSFP cable temperature too high\n",
-                           __func__);
+               dd_dev_err(dd, "%s: QSFP cable temperature too high\n",
+                          __func__);
 
        if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
            (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
-               dd_dev_info(dd, "%s: QSFP cable temperature too low\n",
-                           __func__);
+               dd_dev_err(dd, "%s: QSFP cable temperature too low\n",
+                          __func__);
 
        /*
         * The remaining alarms/warnings don't matter if the link is down.
@@ -9389,75 +9392,75 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
 
        if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
            (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
-               dd_dev_info(dd, "%s: QSFP supply voltage too high\n",
-                           __func__);
+               dd_dev_err(dd, "%s: QSFP supply voltage too high\n",
+                          __func__);
 
        if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
            (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
-               dd_dev_info(dd, "%s: QSFP supply voltage too low\n",
-                           __func__);
+               dd_dev_err(dd, "%s: QSFP supply voltage too low\n",
+                          __func__);
 
        /* Byte 2 is vendor specific */
 
        if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
            (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
-               dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n",
-                           __func__);
+               dd_dev_err(dd, "%s: Cable RX channel 1/2 power too high\n",
+                          __func__);
 
        if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
            (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
-               dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n",
-                           __func__);
+               dd_dev_err(dd, "%s: Cable RX channel 1/2 power too low\n",
+                          __func__);
 
        if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
            (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
-               dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n",
-                           __func__);
+               dd_dev_err(dd, "%s: Cable RX channel 3/4 power too high\n",
+                          __func__);
 
        if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
            (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
-               dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n",
-                           __func__);
+               dd_dev_err(dd, "%s: Cable RX channel 3/4 power too low\n",
+                          __func__);
 
        if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
            (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
-               dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n",
-                           __func__);
+               dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too high\n",
+                          __func__);
 
        if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
            (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
-               dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n",
-                           __func__);
+               dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too low\n",
+                          __func__);
 
        if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
            (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
-               dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n",
-                           __func__);
+               dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too high\n",
+                          __func__);
 
        if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
            (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
-               dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n",
-                           __func__);
+               dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too low\n",
+                          __func__);
 
        if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
            (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
-               dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n",
-                           __func__);
+               dd_dev_err(dd, "%s: Cable TX channel 1/2 power too high\n",
+                          __func__);
 
        if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
            (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
-               dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n",
-                           __func__);
+               dd_dev_err(dd, "%s: Cable TX channel 1/2 power too low\n",
+                          __func__);
 
        if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
            (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
-               dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n",
-                           __func__);
+               dd_dev_err(dd, "%s: Cable TX channel 3/4 power too high\n",
+                          __func__);
 
        if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
            (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
-               dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n",
-                           __func__);
+               dd_dev_err(dd, "%s: Cable TX channel 3/4 power too low\n",
+                          __func__);
 
        /* Bytes 9-10 and 11-12 are reserved */
        /* Bytes 13-15 are vendor specific */
@@ -9742,17 +9745,6 @@ static inline int init_cpu_counters(struct hfi1_devdata *dd)
        return 0;
 }
 
-static const char * const pt_names[] = {
-       "expected",
-       "eager",
-       "invalid"
-};
-
-static const char *pt_name(u32 type)
-{
-       return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type];
-}
-
 /*
  * index is the index into the receive array
  */
@@ -9774,15 +9766,14 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
                           type, index);
                goto done;
        }
-
-       hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
-                 pt_name(type), index, pa, (unsigned long)order);
+       trace_hfi1_put_tid(dd, index, type, pa, order);
 
 #define RT_ADDR_SHIFT 12       /* 4KB kernel address boundary */
        reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
                | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
                | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
                                        << RCV_ARRAY_RT_ADDR_SHIFT;
+       trace_hfi1_write_rcvarray(base + (index * 8), reg);
        writeq(reg, base + (index * 8));
 
        if (type == PT_EAGER)
@@ -9810,15 +9801,6 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
                hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
 }
 
-struct ib_header *hfi1_get_msgheader(
-       struct hfi1_devdata *dd, __le32 *rhf_addr)
-{
-       u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
-
-       return (struct ib_header *)
-               (rhf_addr - dd->rhf_offset + offset);
-}
-
 static const char * const ib_cfg_name_strings[] = {
        "HFI1_IB_CFG_LIDLMC",
        "HFI1_IB_CFG_LWID_DG_ENB",
@@ -10037,28 +10019,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
        sdma_update_lmc(dd, mask, ppd->lid);
 }
 
-static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
-{
-       unsigned long timeout;
-       u32 curr_state;
-
-       timeout = jiffies + msecs_to_jiffies(msecs);
-       while (1) {
-               curr_state = read_physical_state(dd);
-               if (curr_state == state)
-                       break;
-               if (time_after(jiffies, timeout)) {
-                       dd_dev_err(dd,
-                                  "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
-                                  state, curr_state);
-                       return -ETIMEDOUT;
-               }
-               usleep_range(1950, 2050); /* sleep 2ms-ish */
-       }
-
-       return 0;
-}
-
 static const char *state_completed_string(u32 completed)
 {
        static const char * const state_completed[] = {
@@ -10292,7 +10252,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 
        if (do_wait) {
                /* it can take a while for the link to go down */
-               ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000);
+               ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000);
                if (ret < 0)
                        return ret;
        }
@@ -10545,6 +10505,19 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
                        goto unexpected;
                }
 
+               /*
+                * Wait for Link_Up physical state.
+                * Physical and Logical states should already be
+                * be transitioned to LinkUp and LinkInit respectively.
+                */
+               ret = wait_physical_linkstate(ppd, PLS_LINKUP, 1000);
+               if (ret) {
+                       dd_dev_err(dd,
+                                  "%s: physical state did not change to LINK-UP\n",
+                                  __func__);
+                       break;
+               }
+
                ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
                if (ret) {
                        dd_dev_err(dd,
@@ -10658,6 +10631,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
                 */
                if (ret)
                        goto_offline(ppd, 0);
+               else
+                       cache_physical_state(ppd);
                break;
        case HLS_DN_DISABLE:
                /* link is disabled */
@@ -10682,6 +10657,13 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
                                ret = -EINVAL;
                                break;
                        }
+                       ret = wait_physical_linkstate(ppd, PLS_DISABLED, 10000);
+                       if (ret) {
+                               dd_dev_err(dd,
+                                          "%s: physical state did not change to DISABLED\n",
+                                          __func__);
+                               break;
+                       }
                        dc_shutdown(dd);
                }
                ppd->host_link_state = HLS_DN_DISABLE;
@@ -10699,6 +10681,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
                if (ppd->host_link_state != HLS_DN_POLL)
                        goto unexpected;
                ppd->host_link_state = HLS_VERIFY_CAP;
+               cache_physical_state(ppd);
                break;
        case HLS_GOING_UP:
                if (ppd->host_link_state != HLS_VERIFY_CAP)
@@ -12672,21 +12655,56 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
        return -ETIMEDOUT;
 }
 
-u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
+/*
+ * Read the physical hardware link state and set the driver's cached value
+ * of it.
+ */
+void cache_physical_state(struct hfi1_pportdata *ppd)
 {
-       u32 pstate;
+       u32 read_pstate;
        u32 ib_pstate;
 
-       pstate = read_physical_state(ppd->dd);
-       ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
-       if (ppd->last_pstate != ib_pstate) {
+       read_pstate = read_physical_state(ppd->dd);
+       ib_pstate = chip_to_opa_pstate(ppd->dd, read_pstate);
+       /* check if OPA pstate changed */
+       if (chip_to_opa_pstate(ppd->dd, ppd->pstate) != ib_pstate) {
                dd_dev_info(ppd->dd,
                            "%s: physical state changed to %s (0x%x), phy 0x%x\n",
                            __func__, opa_pstate_name(ib_pstate), ib_pstate,
-                           pstate);
-               ppd->last_pstate = ib_pstate;
+                           read_pstate);
+       }
+       ppd->pstate = read_pstate;
+}
+
+/*
+ * wait_physical_linkstate - wait for an physical link state change to occur
+ * @ppd: port device
+ * @state: the state to wait for
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for physical link state change to occur.
+ * Returns 0 if state reached, otherwise -ETIMEDOUT.
+ */
+static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
+                                  int msecs)
+{
+       unsigned long timeout;
+
+       timeout = jiffies + msecs_to_jiffies(msecs);
+       while (1) {
+               cache_physical_state(ppd);
+               if (ppd->pstate == state)
+                       break;
+               if (time_after(jiffies, timeout)) {
+                       dd_dev_err(ppd->dd,
+                                  "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
+                                  state, ppd->pstate);
+                       return -ETIMEDOUT;
+               }
+               usleep_range(1950, 2050); /* sleep 2ms-ish */
        }
-       return ib_pstate;
+
+       return 0;
 }
 
 #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
@@ -12809,30 +12827,24 @@ static void clean_up_interrupts(struct hfi1_devdata *dd)
                for (i = 0; i < dd->num_msix_entries; i++, me++) {
                        if (!me->arg) /* => no irq, no affinity */
                                continue;
-                       hfi1_put_irq_affinity(dd, &dd->msix_entries[i]);
-                       free_irq(me->msix.vector, me->arg);
+                       hfi1_put_irq_affinity(dd, me);
+                       free_irq(me->irq, me->arg);
                }
+
+               /* clean structures */
+               kfree(dd->msix_entries);
+               dd->msix_entries = NULL;
+               dd->num_msix_entries = 0;
        } else {
                /* INTx */
                if (dd->requested_intx_irq) {
                        free_irq(dd->pcidev->irq, dd);
                        dd->requested_intx_irq = 0;
                }
-       }
-
-       /* turn off interrupts */
-       if (dd->num_msix_entries) {
-               /* MSI-X */
-               pci_disable_msix(dd->pcidev);
-       } else {
-               /* INTx */
                disable_intx(dd->pcidev);
        }
 
-       /* clean structures */
-       kfree(dd->msix_entries);
-       dd->msix_entries = NULL;
-       dd->num_msix_entries = 0;
+       pci_free_irq_vectors(dd->pcidev);
 }
 
 /*
@@ -12986,13 +12998,21 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
                        continue;
                /* make sure the name is terminated */
                me->name[sizeof(me->name) - 1] = 0;
+               me->irq = pci_irq_vector(dd->pcidev, i);
+               /*
+                * On err return me->irq.  Don't need to clear this
+                * because 'arg' has not been set, and cleanup will
+                * do the right thing.
+                */
+               if (me->irq < 0)
+                       return me->irq;
 
-               ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
+               ret = request_threaded_irq(me->irq, handler, thread, 0,
                                           me->name, arg);
                if (ret) {
                        dd_dev_err(dd,
-                                  "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
-                                  err_info, me->msix.vector, idx, ret);
+                                  "unable to allocate %s interrupt, irq %d, index %d, err %d\n",
+                                  err_info, me->irq, idx, ret);
                        return ret;
                }
                /*
@@ -13003,8 +13023,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
 
                ret = hfi1_get_irq_affinity(dd, me);
                if (ret)
-                       dd_dev_err(dd,
-                                  "unable to pin IRQ %d\n", ret);
+                       dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
        }
 
        return ret;
@@ -13023,7 +13042,7 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
                struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
                struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
 
-               synchronize_irq(me->msix.vector);
+               synchronize_irq(me->irq);
        }
 }
 
@@ -13036,7 +13055,7 @@ void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
                return;
 
        hfi1_put_irq_affinity(dd, me);
-       free_irq(me->msix.vector, me->arg);
+       free_irq(me->irq, me->arg);
 
        me->arg = NULL;
 }
@@ -13064,14 +13083,19 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
                 DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
        me->name[sizeof(me->name) - 1] = 0;
        me->type = IRQ_RCVCTXT;
-
+       me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
+       if (me->irq < 0) {
+               dd_dev_err(dd, "vnic irq vector request (idx %d) fail %d\n",
+                          idx, me->irq);
+               return;
+       }
        remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
 
-       ret = request_threaded_irq(me->msix.vector, receive_context_interrupt,
+       ret = request_threaded_irq(me->irq, receive_context_interrupt,
                                   receive_context_thread, 0, me->name, arg);
        if (ret) {
-               dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n",
-                          me->msix.vector, idx, ret);
+               dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
+                          me->irq, idx, ret);
                return;
        }
        /*
@@ -13084,7 +13108,7 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
        if (ret) {
                dd_dev_err(dd,
                           "unable to pin IRQ %d\n", ret);
-               free_irq(me->msix.vector, me->arg);
+               free_irq(me->irq, me->arg);
        }
 }
 
@@ -13107,9 +13131,8 @@ static void reset_interrupts(struct hfi1_devdata *dd)
 
 static int set_up_interrupts(struct hfi1_devdata *dd)
 {
-       struct hfi1_msix_entry *entries;
-       u32 total, request;
-       int i, ret;
+       u32 total;
+       int ret, request;
        int single_interrupt = 0; /* we expect to have all the interrupts */
 
        /*
@@ -13121,39 +13144,31 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
         */
        total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
 
-       entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
-       if (!entries) {
-               ret = -ENOMEM;
-               goto fail;
-       }
-       /* 1-1 MSI-X entry assignment */
-       for (i = 0; i < total; i++)
-               entries[i].msix.entry = i;
-
        /* ask for MSI-X interrupts */
-       request = total;
-       request_msix(dd, &request, entries);
-
-       if (request == 0) {
+       request = request_msix(dd, total);
+       if (request < 0) {
+               ret = request;
+               goto fail;
+       } else if (request == 0) {
                /* using INTx */
                /* dd->num_msix_entries already zero */
-               kfree(entries);
                single_interrupt = 1;
                dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
+       } else if (request < total) {
+               /* using MSI-X, with reduced interrupts */
+               dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n",
+                          total, request);
+               ret = -EINVAL;
+               goto fail;
        } else {
-               /* using MSI-X */
-               dd->num_msix_entries = request;
-               dd->msix_entries = entries;
-
-               if (request != total) {
-                       /* using MSI-X, with reduced interrupts */
-                       dd_dev_err(
-                               dd,
-                               "cannot handle reduced interrupt case, want %u, got %u\n",
-                               total, request);
-                       ret = -EINVAL;
+               dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
+                                          GFP_KERNEL);
+               if (!dd->msix_entries) {
+                       ret = -ENOMEM;
                        goto fail;
                }
+               /* using MSI-X */
+               dd->num_msix_entries = total;
                dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
        }
 
@@ -14793,7 +14808,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
                /* start in offline */
                ppd->host_link_state = HLS_DN_OFFLINE;
                init_vl_arb_caches(ppd);
-               ppd->last_pstate = 0xff; /* invalid value */
+               ppd->pstate = PLS_OFFLINE;
        }
 
        dd->link_default = HLS_DN_POLL;
index cbe455d9ab8b65547849011b6e2e2ffd937ee0bb..3dab3156ba4ac3dc22a7d18e67fb2c01aeeaae3f 100644 (file)
@@ -744,6 +744,7 @@ int is_bx(struct hfi1_devdata *dd);
 u32 read_physical_state(struct hfi1_devdata *dd);
 u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
 u32 get_logical_state(struct hfi1_pportdata *ppd);
+void cache_physical_state(struct hfi1_pportdata *ppd);
 const char *opa_lstate_name(u32 lstate);
 const char *opa_pstate_name(u32 pstate);
 u32 driver_physical_state(struct hfi1_pportdata *ppd);
@@ -1347,8 +1348,6 @@ enum {
 u64 get_all_cpu_total(u64 __percpu *cntr);
 void hfi1_start_cleanup(struct hfi1_devdata *dd);
 void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
-struct ib_header *hfi1_get_msgheader(
-                               struct hfi1_devdata *dd, __le32 *rhf_addr);
 void hfi1_init_ctxt(struct send_context *sc);
 void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
                  u32 type, unsigned long pa, u16 order);
@@ -1356,7 +1355,6 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd);
 void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt);
 u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp);
 u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp);
-u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd);
 int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which);
 int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val);
 int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey);
index 995d62c7f9a7d6f2d1c7c2a7b86f8044837e95fe..ba9ab971ced942352fd2ab27bba9ae97c275e81f 100644 (file)
@@ -325,6 +325,7 @@ struct diag_pkt {
 #define HFI1_LRH_BTH 0x0002      /* 1. word of IB LRH - next header: BTH */
 
 /* misc. */
+#define SC15_PACKET 0xF
 #define SIZE_OF_CRC 1
 
 #define LIM_MGMT_P_KEY       0x7FFF
index a50870e455a30156a75c7527e4ec2b7646b8de9b..e64e9e28c936ef00f0026270345ef427d46a9277 100644 (file)
@@ -224,6 +224,20 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
                        (offset * RCV_BUF_BLOCK_SIZE));
 }
 
+static inline void *hfi1_get_header(struct hfi1_devdata *dd,
+                                   __le32 *rhf_addr)
+{
+       u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
+
+       return (void *)(rhf_addr - dd->rhf_offset + offset);
+}
+
+static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd,
+                                                  __le32 *rhf_addr)
+{
+       return (struct ib_header *)hfi1_get_header(dd, rhf_addr);
+}
+
 /*
  * Validate and encode the a given RcvArray Buffer size.
  * The function will check whether the given size falls within
@@ -249,7 +263,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 {
        struct ib_header *rhdr = packet->hdr;
        u32 rte = rhf_rcv_type_err(packet->rhf);
-       int lnh = ib_get_lnh(rhdr);
+       u8 lnh = ib_get_lnh(rhdr);
+       bool has_grh = false;
        struct hfi1_ibport *ibp = rcd_to_iport(rcd);
        struct hfi1_devdata *dd = ppd->dd;
        struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
@@ -257,37 +272,42 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
        if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
                return;
 
+       if (lnh == HFI1_LRH_BTH) {
+               packet->ohdr = &rhdr->u.oth;
+       } else if (lnh == HFI1_LRH_GRH) {
+               has_grh = true;
+               packet->ohdr = &rhdr->u.l.oth;
+               packet->grh = &rhdr->u.l.grh;
+       } else {
+               goto drop;
+       }
+
        if (packet->rhf & RHF_TID_ERR) {
                /* For TIDERR and RC QPs preemptively schedule a NAK */
-               struct ib_other_headers *ohdr = NULL;
                u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
-               u16 lid  = ib_get_dlid(rhdr);
+               u32 dlid = ib_get_dlid(rhdr);
                u32 qp_num;
-               u32 rcv_flags = 0;
+               u32 mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE);
 
                /* Sanity check packet */
                if (tlen < 24)
                        goto drop;
 
                /* Check for GRH */
-               if (lnh == HFI1_LRH_BTH) {
-                       ohdr = &rhdr->u.oth;
-               } else if (lnh == HFI1_LRH_GRH) {
+               if (has_grh) {
                        u32 vtf;
+                       struct ib_grh *grh = packet->grh;
 
-                       ohdr = &rhdr->u.l.oth;
-                       if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+                       if (grh->next_hdr != IB_GRH_NEXT_HDR)
                                goto drop;
-                       vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow);
+                       vtf = be32_to_cpu(grh->version_tclass_flow);
                        if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
                                goto drop;
-                       rcv_flags |= HFI1_HAS_GRH;
-               } else {
-                       goto drop;
                }
+
                /* Get the destination QP number. */
-               qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
-               if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+               qp_num = ib_bth_get_qpn(packet->ohdr);
+               if (dlid < mlid_base) {
                        struct rvt_qp *qp;
                        unsigned long flags;
 
@@ -312,11 +332,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 
                        switch (qp->ibqp.qp_type) {
                        case IB_QPT_RC:
-                               hfi1_rc_hdrerr(
-                                       rcd,
-                                       rhdr,
-                                       rcv_flags,
-                                       qp);
+                               hfi1_rc_hdrerr(rcd, packet, qp);
                                break;
                        default:
                                /* For now don't handle any other QP types */
@@ -332,9 +348,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
        switch (rte) {
        case RHF_RTE_ERROR_OP_CODE_ERR:
        {
-               u32 opcode;
                void *ebuf = NULL;
-               __be32 *bth = NULL;
+               u8 opcode;
 
                if (rhf_use_egr_bfr(packet->rhf))
                        ebuf = packet->ebuf;
@@ -342,16 +357,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
                if (!ebuf)
                        goto drop; /* this should never happen */
 
-               if (lnh == HFI1_LRH_BTH)
-                       bth = (__be32 *)ebuf;
-               else if (lnh == HFI1_LRH_GRH)
-                       bth = (__be32 *)((char *)ebuf + sizeof(struct ib_grh));
-               else
-                       goto drop;
-
-               opcode = be32_to_cpu(bth[0]) >> 24;
-               opcode &= 0xff;
-
+               opcode = ib_bth_get_opcode(packet->ohdr);
                if (opcode == IB_OPCODE_CNP) {
                        /*
                         * Only in pre-B0 h/w is the CNP_OPCODE handled
@@ -365,7 +371,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
                        sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf);
                        sl = ibp->sc_to_sl[sc5];
 
-                       lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
+                       lqpn = ib_bth_get_qpn(packet->ohdr);
                        rcu_read_lock();
                        qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn);
                        if (!qp) {
@@ -415,7 +421,6 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd,
        packet->rhf = rhf_to_cpu(packet->rhf_addr);
        packet->rhqoff = rcd->head;
        packet->numpkt = 0;
-       packet->rcv_flags = 0;
 }
 
 void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
@@ -424,21 +429,18 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
        struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
        struct ib_header *hdr = pkt->hdr;
        struct ib_other_headers *ohdr = pkt->ohdr;
-       struct ib_grh *grh = NULL;
+       struct ib_grh *grh = pkt->grh;
        u32 rqpn = 0, bth1;
        u16 rlid, dlid = ib_get_dlid(hdr);
        u8 sc, svc_type;
        bool is_mcast = false;
 
-       if (pkt->rcv_flags & HFI1_HAS_GRH)
-               grh = &hdr->u.l.grh;
-
        switch (qp->ibqp.qp_type) {
        case IB_QPT_SMI:
        case IB_QPT_GSI:
        case IB_QPT_UD:
                rlid = ib_get_slid(hdr);
-               rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
+               rqpn = ib_get_sqpn(ohdr);
                svc_type = IB_CC_SVCTYPE_UD;
                is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
                        (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
@@ -461,7 +463,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
 
        bth1 = be32_to_cpu(ohdr->bth[1]);
        if (do_cnp && (bth1 & IB_FECN_SMASK)) {
-               u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
+               u16 pkey = ib_bth_get_pkey(ohdr);
 
                return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh);
        }
@@ -591,9 +593,10 @@ static void __prescan_rxq(struct hfi1_packet *packet)
 
                if (lnh == HFI1_LRH_BTH) {
                        packet->ohdr = &hdr->u.oth;
+                       packet->grh = NULL;
                } else if (lnh == HFI1_LRH_GRH) {
                        packet->ohdr = &hdr->u.l.oth;
-                       packet->rcv_flags |= HFI1_HAS_GRH;
+                       packet->grh = &hdr->u.l.grh;
                } else {
                        goto next; /* just in case */
                }
@@ -698,10 +701,8 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 {
        int ret;
 
-       packet->hdr = hfi1_get_msgheader(packet->rcd->dd,
-                                        packet->rhf_addr);
-       packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
        packet->etype = rhf_rcv_type(packet->rhf);
+
        /* total length */
        packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */
        /* retrieve eager buffer details */
@@ -759,7 +760,7 @@ static inline void process_rcv_update(int last, struct hfi1_packet *packet)
                               packet->etail, 0, 0);
                packet->updegr = 0;
        }
-       packet->rcv_flags = 0;
+       packet->grh = NULL;
 }
 
 static inline void finish_packet(struct hfi1_packet *packet)
@@ -896,16 +897,21 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
                                      struct hfi1_devdata *dd)
 {
        struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
-       struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
-                                                  packet->rhf_addr);
        u8 etype = rhf_rcv_type(packet->rhf);
+       u8 sc = SC15_PACKET;
 
-       if (etype == RHF_RCV_TYPE_IB &&
-           hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) {
-               int hwstate = read_logical_state(dd);
+       if (etype == RHF_RCV_TYPE_IB) {
+               struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
+                                                          packet->rhf_addr);
+               sc = hfi1_9B_get_sc5(hdr, packet->rhf);
+       }
+       if (sc != SC15_PACKET) {
+               int hwstate = driver_lstate(rcd->ppd);
 
-               if (hwstate != LSTATE_ACTIVE) {
-                       dd_dev_info(dd, "Unexpected link state %d\n", hwstate);
+               if (hwstate != IB_PORT_ACTIVE) {
+                       dd_dev_info(dd,
+                                   "Unexpected link state %s\n",
+                                   opa_lstate_name(hwstate));
                        return 0;
                }
 
@@ -1321,6 +1327,58 @@ bail:
        return ret;
 }
 
+static inline void hfi1_setup_ib_header(struct hfi1_packet *packet)
+{
+       packet->hdr = (struct hfi1_ib_message_header *)
+                       hfi1_get_msgheader(packet->rcd->dd,
+                                          packet->rhf_addr);
+       packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
+}
+
+static int hfi1_setup_9B_packet(struct hfi1_packet *packet)
+{
+       struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
+       struct ib_header *hdr;
+       u8 lnh;
+
+       hfi1_setup_ib_header(packet);
+       hdr = packet->hdr;
+
+       lnh = ib_get_lnh(hdr);
+       if (lnh == HFI1_LRH_BTH) {
+               packet->ohdr = &hdr->u.oth;
+               packet->grh = NULL;
+       } else if (lnh == HFI1_LRH_GRH) {
+               u32 vtf;
+
+               packet->ohdr = &hdr->u.l.oth;
+               packet->grh = &hdr->u.l.grh;
+               if (packet->grh->next_hdr != IB_GRH_NEXT_HDR)
+                       goto drop;
+               vtf = be32_to_cpu(packet->grh->version_tclass_flow);
+               if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
+                       goto drop;
+       } else {
+               goto drop;
+       }
+
+       /* Query commonly used fields from packet header */
+       packet->opcode = ib_bth_get_opcode(packet->ohdr);
+       packet->slid = ib_get_slid(hdr);
+       packet->dlid = ib_get_dlid(hdr);
+       packet->sl = ib_get_sl(hdr);
+       packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf);
+       packet->pad = ib_bth_get_pad(packet->ohdr);
+       packet->extra_byte = 0;
+       packet->fecn = ib_bth_get_fecn(packet->ohdr);
+       packet->becn = ib_bth_get_becn(packet->ohdr);
+
+       return 0;
+drop:
+       ibp->rvp.n_pkt_drops++;
+       return -EINVAL;
+}
+
 void handle_eflags(struct hfi1_packet *packet)
 {
        struct hfi1_ctxtdata *rcd = packet->rcd;
@@ -1351,6 +1409,9 @@ int process_receive_ib(struct hfi1_packet *packet)
        if (unlikely(hfi1_dbg_fault_packet(packet)))
                return RHF_RCV_CONTINUE;
 
+       if (hfi1_setup_9B_packet(packet))
+               return RHF_RCV_CONTINUE;
+
        trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
                          packet->rcd->ctxt,
                          rhf_err_flags(packet->rhf),
@@ -1422,6 +1483,7 @@ int process_receive_error(struct hfi1_packet *packet)
                 rhf_rcv_type_err(packet->rhf) == 3))
                return RHF_RCV_CONTINUE;
 
+       hfi1_setup_ib_header(packet);
        handle_eflags(packet);
 
        if (unlikely(rhf_err_flags(packet->rhf)))
@@ -1435,6 +1497,8 @@ int kdeth_process_expected(struct hfi1_packet *packet)
 {
        if (unlikely(hfi1_dbg_fault_packet(packet)))
                return RHF_RCV_CONTINUE;
+
+       hfi1_setup_ib_header(packet);
        if (unlikely(rhf_err_flags(packet->rhf)))
                handle_eflags(packet);
 
@@ -1445,6 +1509,7 @@ int kdeth_process_expected(struct hfi1_packet *packet)
 
 int kdeth_process_eager(struct hfi1_packet *packet)
 {
+       hfi1_setup_ib_header(packet);
        if (unlikely(rhf_err_flags(packet->rhf)))
                handle_eflags(packet);
        if (unlikely(hfi1_dbg_fault_packet(packet)))
index 26da124c88e2094e391e427ed45b233132a76f9d..d46b171079010d2cf7e42dd7557f0abc2bef1348 100644 (file)
@@ -250,7 +250,6 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
 {
        void *buffer;
        void *p;
-       u32 length;
        int ret;
 
        buffer = kmalloc(P1_SIZE, GFP_KERNEL);
@@ -265,13 +264,13 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
 
        /* scan for image magic that may trail the actual data */
        p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
-       if (p)
-               length = p - buffer;
-       else
-               length = P1_SIZE;
+       if (!p) {
+               kfree(buffer);
+               return -ENOENT;
+       }
 
        *data = buffer;
-       *size = length;
+       *size = p - buffer;
        return 0;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c
new file mode 100644 (file)
index 0000000..0af9167
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "exp_rcv.h"
+#include "trace.h"
+
+/**
+ * exp_tid_group_init - initialize exp_tid_set
+ * @set - the set
+ */
+void hfi1_exp_tid_group_init(struct exp_tid_set *set)
+{
+       INIT_LIST_HEAD(&set->list);
+       set->count = 0;
+}
+
+/**
+ * alloc_ctxt_rcv_groups - initialize expected receive groups
+ * @rcd - the context to add the groupings to
+ */
+int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
+{
+       struct hfi1_devdata *dd = rcd->dd;
+       u32 tidbase;
+       struct tid_group *grp;
+       int i;
+
+       tidbase = rcd->expected_base;
+       for (i = 0; i < rcd->expected_count /
+                    dd->rcv_entries.group_size; i++) {
+               grp = kzalloc(sizeof(*grp), GFP_KERNEL);
+               if (!grp)
+                       goto bail;
+               grp->size = dd->rcv_entries.group_size;
+               grp->base = tidbase;
+               tid_group_add_tail(grp, &rcd->tid_group_list);
+               tidbase += dd->rcv_entries.group_size;
+       }
+
+       return 0;
+bail:
+       hfi1_free_ctxt_rcv_groups(rcd);
+       return -ENOMEM;
+}
+
+/**
+ * free_ctxt_rcv_groups - free  expected receive groups
+ * @rcd - the context to free
+ *
+ * The routine dismantles the expect receive linked
+ * list and clears any tids associated with the receive
+ * context.
+ *
+ * This should only be called for kernel contexts and the
+ * a base user context.
+ */
+void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
+{
+       struct tid_group *grp, *gptr;
+
+       WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list));
+       WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list));
+
+       list_for_each_entry_safe(grp, gptr, &rcd->tid_group_list.list, list) {
+               tid_group_remove(grp, &rcd->tid_group_list);
+               kfree(grp);
+       }
+
+       hfi1_clear_tids(rcd);
+}
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h
new file mode 100644 (file)
index 0000000..c7d02bc
--- /dev/null
@@ -0,0 +1,187 @@
+#ifndef _HFI1_EXP_RCV_H
+#define _HFI1_EXP_RCV_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+
+#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
+
+#define EXP_TID_TIDLEN_MASK   0x7FFULL
+#define EXP_TID_TIDLEN_SHIFT  0
+#define EXP_TID_TIDCTRL_MASK  0x3ULL
+#define EXP_TID_TIDCTRL_SHIFT 20
+#define EXP_TID_TIDIDX_MASK   0x3FFULL
+#define EXP_TID_TIDIDX_SHIFT  22
+#define EXP_TID_GET(tid, field)        \
+       (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK)
+
+#define EXP_TID_SET(field, value)                      \
+       (((value) & EXP_TID_TID##field##_MASK) <<       \
+        EXP_TID_TID##field##_SHIFT)
+#define EXP_TID_CLEAR(tid, field) ({                                   \
+               (tid) &= ~(EXP_TID_TID##field##_MASK <<                 \
+                          EXP_TID_TID##field##_SHIFT);                 \
+               })
+#define EXP_TID_RESET(tid, field, value) do {                          \
+               EXP_TID_CLEAR(tid, field);                              \
+               (tid) |= EXP_TID_SET(field, (value));                   \
+       } while (0)
+
+/*
+ * Define fields in the KDETH header so we can update the header
+ * template.
+ */
+#define KDETH_OFFSET_SHIFT        0
+#define KDETH_OFFSET_MASK         0x7fff
+#define KDETH_OM_SHIFT            15
+#define KDETH_OM_MASK             0x1
+#define KDETH_TID_SHIFT           16
+#define KDETH_TID_MASK            0x3ff
+#define KDETH_TIDCTRL_SHIFT       26
+#define KDETH_TIDCTRL_MASK        0x3
+#define KDETH_INTR_SHIFT          28
+#define KDETH_INTR_MASK           0x1
+#define KDETH_SH_SHIFT            29
+#define KDETH_SH_MASK             0x1
+#define KDETH_KVER_SHIFT          30
+#define KDETH_KVER_MASK           0x3
+#define KDETH_JKEY_SHIFT          0x0
+#define KDETH_JKEY_MASK           0xff
+#define KDETH_HCRC_UPPER_SHIFT    16
+#define KDETH_HCRC_UPPER_MASK     0xff
+#define KDETH_HCRC_LOWER_SHIFT    24
+#define KDETH_HCRC_LOWER_MASK     0xff
+
+#define KDETH_GET(val, field)                                          \
+       (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK)
+#define KDETH_SET(dw, field, val) do {                                 \
+               u32 dwval = le32_to_cpu(dw);                            \
+               dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \
+               dwval |= (((val) & KDETH_##field##_MASK) << \
+                         KDETH_##field##_SHIFT);                       \
+               dw = cpu_to_le32(dwval);                                \
+       } while (0)
+
+#define KDETH_RESET(dw, field, val) ({ dw = 0; KDETH_SET(dw, field, val); })
+
+/* KDETH OM multipliers and switch over point */
+#define KDETH_OM_SMALL     4
+#define KDETH_OM_SMALL_SHIFT     2
+#define KDETH_OM_LARGE     64
+#define KDETH_OM_LARGE_SHIFT     6
+#define KDETH_OM_MAX_SIZE  (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
+
+struct tid_group {
+       struct list_head list;
+       u32 base;
+       u8 size;
+       u8 used;
+       u8 map;
+};
+
+/*
+ * Write an "empty" RcvArray entry.
+ * This function exists so the TID registaration code can use it
+ * to write to unused/unneeded entries and still take advantage
+ * of the WC performance improvements. The HFI will ignore this
+ * write to the RcvArray entry.
+ */
+static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
+{
+       /*
+        * Doing the WC fill writes only makes sense if the device is
+        * present and the RcvArray has been mapped as WC memory.
+        */
+       if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc)
+               writeq(0, dd->rcvarray_wc + (index * 8));
+}
+
+static inline void tid_group_add_tail(struct tid_group *grp,
+                                     struct exp_tid_set *set)
+{
+       list_add_tail(&grp->list, &set->list);
+       set->count++;
+}
+
+static inline void tid_group_remove(struct tid_group *grp,
+                                   struct exp_tid_set *set)
+{
+       list_del_init(&grp->list);
+       set->count--;
+}
+
+static inline void tid_group_move(struct tid_group *group,
+                                 struct exp_tid_set *s1,
+                                 struct exp_tid_set *s2)
+{
+       tid_group_remove(group, s1);
+       tid_group_add_tail(group, s2);
+}
+
+static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
+{
+       struct tid_group *grp =
+               list_first_entry(&set->list, struct tid_group, list);
+       list_del_init(&grp->list);
+       set->count--;
+       return grp;
+}
+
+static inline u32 rcventry2tidinfo(u32 rcventry)
+{
+       u32 pair = rcventry & ~0x1;
+
+       return EXP_TID_SET(IDX, pair >> 1) |
+               EXP_TID_SET(CTRL, 1 << (rcventry - pair));
+}
+
+int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
+void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
+void hfi1_exp_tid_group_init(struct exp_tid_set *set);
+
+#endif /* _HFI1_EXP_RCV_H */
index 3158128d57e8df4461256330db551a69b2d59b28..bbf80b1dd9d9672c00b94aa70f0176fd72214524 100644 (file)
@@ -774,6 +774,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
        *ev = 0;
 
        __clear_bit(fdata->subctxt, uctxt->in_use_ctxts);
+       fdata->uctxt = NULL;
+       hfi1_rcd_put(uctxt); /* fdata reference */
        if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) {
                mutex_unlock(&hfi1_mutex);
                goto done;
@@ -794,17 +796,16 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
        /* Clear the context's J_KEY */
        hfi1_clear_ctxt_jkey(dd, uctxt->ctxt);
        /*
-        * Reset context integrity checks to default.
-        * (writes to CSRs probably belong in chip.c)
+        * If a send context is allocated, reset context integrity
+        * checks to default and disable the send context.
         */
-       write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
-                       hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type));
-       sc_disable(uctxt->sc);
+       if (uctxt->sc) {
+               set_pio_integrity(uctxt->sc);
+               sc_disable(uctxt->sc);
+       }
        spin_unlock_irqrestore(&dd->uctxt_lock, flags);
 
-       dd->rcd[uctxt->ctxt] = NULL;
-
-       hfi1_user_exp_rcv_grp_free(uctxt);
+       hfi1_free_ctxt_rcv_groups(uctxt);
        hfi1_clear_ctxt_pkey(dd, uctxt);
 
        uctxt->rcvwait_to = 0;
@@ -816,8 +817,11 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
        hfi1_stats.sps_ctxts--;
        if (++dd->freectxts == dd->num_user_contexts)
                aspm_enable_all(dd);
+
+       /* _rcd_put() should be done after releasing mutex */
+       dd->rcd[uctxt->ctxt] = NULL;
        mutex_unlock(&hfi1_mutex);
-       hfi1_free_ctxtdata(dd, uctxt);
+       hfi1_rcd_put(uctxt);  /* dd reference */
 done:
        mmdrop(fdata->mm);
        kobject_put(&dd->kobj);
@@ -887,16 +891,17 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
                ret = wait_event_interruptible(fd->uctxt->wait, !test_bit(
                                               HFI1_CTXT_BASE_UNINIT,
                                               &fd->uctxt->event_flags));
-               if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) {
-                       clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
-                       return -ENOMEM;
-               }
+               if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags))
+                       ret = -ENOMEM;
+
                /* The only thing a sub context needs is the user_xxx stuff */
                if (!ret)
                        ret = init_user_ctxt(fd);
 
-               if (ret)
+               if (ret) {
                        clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
+                       hfi1_rcd_put(fd->uctxt);
+               }
        } else if (!ret) {
                ret = setup_base_ctxt(fd);
                if (fd->uctxt->subctxt_cnt) {
@@ -961,6 +966,8 @@ static int find_sub_ctxt(struct hfi1_filedata *fd,
 
                fd->uctxt = uctxt;
                fd->subctxt = subctxt;
+
+               hfi1_rcd_get(uctxt);
                __set_bit(fd->subctxt, uctxt->in_use_ctxts);
 
                return 1;
@@ -1069,11 +1076,14 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
                aspm_disable_all(dd);
        fd->uctxt = uctxt;
 
+       /* Count the reference for the fd */
+       hfi1_rcd_get(uctxt);
+
        return 0;
 
 ctxdata_free:
        dd->rcd[ctxt] = NULL;
-       hfi1_free_ctxtdata(dd, uctxt);
+       hfi1_rcd_put(uctxt);
        return ret;
 }
 
@@ -1260,7 +1270,7 @@ static int setup_base_ctxt(struct hfi1_filedata *fd)
        if (ret)
                goto setup_failed;
 
-       ret = hfi1_user_exp_rcv_grp_init(fd);
+       ret = hfi1_alloc_ctxt_rcv_groups(uctxt);
        if (ret)
                goto setup_failed;
 
@@ -1273,6 +1283,7 @@ static int setup_base_ctxt(struct hfi1_filedata *fd)
        return 0;
 
 setup_failed:
+       /* Call _free_ctxtdata, not _rcd_put().  We still need the context. */
        hfi1_free_ctxtdata(dd, uctxt);
        return ret;
 }
index 414a04a481c2abab3b3fe44d3949d591e684c065..1a33a5087734976e1ded340b04fba77674347fec 100644 (file)
@@ -213,11 +213,9 @@ struct hfi1_ctxtdata {
 
        /* dynamic receive available interrupt timeout */
        u32 rcvavail_timeout;
-       /*
-        * number of opens (including slave sub-contexts) on this instance
-        * (ignoring forks, dup, etc. for now)
-        */
-       int cnt;
+       /* Reference count the base context usage */
+       struct kref kref;
+
        /* Device context index */
        unsigned ctxt;
        /*
@@ -356,17 +354,26 @@ struct hfi1_packet {
        __le32 *rhf_addr;
        struct rvt_qp *qp;
        struct ib_other_headers *ohdr;
+       struct ib_grh *grh;
        u64 rhf;
        u32 maxcnt;
        u32 rhqoff;
+       u32 dlid;
+       u32 slid;
        u16 tlen;
        s16 etail;
        u8 hlen;
        u8 numpkt;
        u8 rsize;
        u8 updegr;
-       u8 rcv_flags;
        u8 etype;
+       u8 extra_byte;
+       u8 pad;
+       u8 sc;
+       u8 sl;
+       u8 opcode;
+       bool becn;
+       bool fecn;
 };
 
 struct rvt_sge_state;
@@ -512,7 +519,7 @@ static inline void incr_cntr32(u32 *cntr)
 #define MAX_NAME_SIZE 64
 struct hfi1_msix_entry {
        enum irq_type type;
-       struct msix_entry msix;
+       int irq;
        void *arg;
        char name[MAX_NAME_SIZE];
        cpumask_t mask;
@@ -654,7 +661,7 @@ struct hfi1_pportdata {
        u8 link_enabled;        /* link enabled? */
        u8 linkinit_reason;
        u8 local_tx_rate;       /* rate given to 8051 firmware */
-       u8 last_pstate;         /* info only */
+       u8 pstate;              /* info only */
        u8 qsfp_retry_count;
 
        /* placeholders for IB MAD packet settings */
@@ -1282,7 +1289,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
 void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
                         struct hfi1_devdata *dd, u8 hw_pidx, u8 port);
 void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
-
+int hfi1_rcd_put(struct hfi1_ctxtdata *rcd);
+void hfi1_rcd_get(struct hfi1_ctxtdata *rcd);
 int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
 int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
@@ -1321,6 +1329,22 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
                return ppd->lstate;
 }
 
+/* return the driver's idea of the physical OPA port state */
+static inline u32 driver_pstate(struct hfi1_pportdata *ppd)
+{
+       /*
+        * The driver does some processing from the time the physical
+        * link state is at LINKUP to the time the SM can be notified
+        * as such. Return IB_PORTPHYSSTATE_TRAINING until the software
+        * state is ready.
+        */
+       if (ppd->pstate == PLS_LINKUP &&
+           !(ppd->host_link_state & HLS_UP))
+               return IB_PORTPHYSSTATE_TRAINING;
+       else
+               return chip_to_opa_pstate(ppd->dd, ppd->pstate);
+}
+
 void receive_interrupt_work(struct work_struct *work);
 
 /* extract service channel from header and rhf */
@@ -1829,9 +1853,7 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev);
 int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
 void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
 int pcie_speeds(struct hfi1_devdata *dd);
-void request_msix(struct hfi1_devdata *dd, u32 *nent,
-                 struct hfi1_msix_entry *entry);
-void hfi1_enable_intx(struct pci_dev *pdev);
+int request_msix(struct hfi1_devdata *dd, u32 msireq);
 void restore_pci_variables(struct hfi1_devdata *dd);
 int do_pcie_gen3_transition(struct hfi1_devdata *dd);
 int parse_platform_config(struct hfi1_devdata *dd);
@@ -2087,52 +2109,13 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
 #define DD_DEV_ENTRY(dd)       __string(dev, dev_name(&(dd)->pcidev->dev))
 #define DD_DEV_ASSIGN(dd)      __assign_str(dev, dev_name(&(dd)->pcidev->dev))
 
-#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
-#define show_packettype(etype)                  \
-__print_symbolic(etype,                         \
-       packettype_name(EXPECTED),              \
-       packettype_name(EAGER),                 \
-       packettype_name(IB),                    \
-       packettype_name(ERROR),                 \
-       packettype_name(BYPASS))
-
-#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode  }
-#define show_ib_opcode(opcode)                             \
-__print_symbolic(opcode,                                   \
-       ib_opcode_name(RC_SEND_FIRST),                     \
-       ib_opcode_name(RC_SEND_MIDDLE),                    \
-       ib_opcode_name(RC_SEND_LAST),                      \
-       ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE),       \
-       ib_opcode_name(RC_SEND_ONLY),                      \
-       ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE),       \
-       ib_opcode_name(RC_RDMA_WRITE_FIRST),               \
-       ib_opcode_name(RC_RDMA_WRITE_MIDDLE),              \
-       ib_opcode_name(RC_RDMA_WRITE_LAST),                \
-       ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
-       ib_opcode_name(RC_RDMA_WRITE_ONLY),                \
-       ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
-       ib_opcode_name(RC_RDMA_READ_REQUEST),              \
-       ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST),       \
-       ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE),      \
-       ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST),        \
-       ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY),        \
-       ib_opcode_name(RC_ACKNOWLEDGE),                    \
-       ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE),             \
-       ib_opcode_name(RC_COMPARE_SWAP),                   \
-       ib_opcode_name(RC_FETCH_ADD),                      \
-       ib_opcode_name(UC_SEND_FIRST),                     \
-       ib_opcode_name(UC_SEND_MIDDLE),                    \
-       ib_opcode_name(UC_SEND_LAST),                      \
-       ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE),       \
-       ib_opcode_name(UC_SEND_ONLY),                      \
-       ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE),       \
-       ib_opcode_name(UC_RDMA_WRITE_FIRST),               \
-       ib_opcode_name(UC_RDMA_WRITE_MIDDLE),              \
-       ib_opcode_name(UC_RDMA_WRITE_LAST),                \
-       ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
-       ib_opcode_name(UC_RDMA_WRITE_ONLY),                \
-       ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
-       ib_opcode_name(UD_SEND_ONLY),                      \
-       ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE),       \
-       ib_opcode_name(CNP))
+/*
+ * hfi1_check_mcast- Check if the given lid is
+ * in the IB multicast range.
+ */
+static inline bool hfi1_check_mcast(u16 lid)
+{
+       return ((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+               (lid != be16_to_cpu(IB_LID_PERMISSIVE)));
+}
 #endif                          /* _HFI1_KERNEL_H */
index 4a11d4da4c9201ce4ba8d8df12b7895ff75c07ec..dfdb4126ca05e937db795a8ce5c50fa64c6658ee 100644 (file)
@@ -67,6 +67,7 @@
 #include "aspm.h"
 #include "affinity.h"
 #include "vnic.h"
+#include "exp_rcv.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -190,15 +191,45 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 nomem:
        ret = -ENOMEM;
 
-       if (dd->rcd) {
-               for (i = 0; i < dd->num_rcv_contexts; ++i)
-                       hfi1_free_ctxtdata(dd, dd->rcd[i]);
-       }
+       for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i)
+               hfi1_rcd_put(dd->rcd[i]);
+
+       /* All the contexts should be freed, free the array */
        kfree(dd->rcd);
        dd->rcd = NULL;
        return ret;
 }
 
+/*
+ * Helper routines for the receive context reference count (rcd and uctxt)
+ */
+static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd)
+{
+       kref_init(&rcd->kref);
+}
+
+static void hfi1_rcd_free(struct kref *kref)
+{
+       struct hfi1_ctxtdata *rcd =
+               container_of(kref, struct hfi1_ctxtdata, kref);
+
+       hfi1_free_ctxtdata(rcd->dd, rcd);
+       kfree(rcd);
+}
+
+int hfi1_rcd_put(struct hfi1_ctxtdata *rcd)
+{
+       if (rcd)
+               return kref_put(&rcd->kref, hfi1_rcd_free);
+
+       return 0;
+}
+
+void hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
+{
+       kref_get(&rcd->kref);
+}
+
 /*
  * Common code for user and kernel context setup.
  */
@@ -221,6 +252,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
                hfi1_cdbg(PROC, "setting up context %u\n", ctxt);
 
                INIT_LIST_HEAD(&rcd->qp_wait_list);
+               hfi1_exp_tid_group_init(&rcd->tid_group_list);
+               hfi1_exp_tid_group_init(&rcd->tid_used_list);
+               hfi1_exp_tid_group_init(&rcd->tid_full_list);
                rcd->ppd = ppd;
                rcd->dd = dd;
                __set_bit(0, rcd->in_use_ctxts);
@@ -328,6 +362,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
                        if (!rcd->opstats)
                                goto bail;
                }
+
+               hfi1_rcd_init(rcd);
        }
        return rcd;
 bail:
@@ -927,14 +963,11 @@ static void shutdown_device(struct hfi1_devdata *dd)
  * @rcd: the ctxtdata structure
  *
  * free up any allocated data for a context
- * This should not touch anything that would affect a simultaneous
- * re-allocation of context data, because it is called after hfi1_mutex
- * is released (and can be called from reinit as well).
  * It should never change any chip state, or global driver state.
  */
 void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 {
-       unsigned e;
+       u32 e;
 
        if (!rcd)
                return;
@@ -953,6 +986,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 
        /* all the RcvArray entries should have been cleared by now */
        kfree(rcd->egrbufs.rcvtids);
+       rcd->egrbufs.rcvtids = NULL;
 
        for (e = 0; e < rcd->egrbufs.alloced; e++) {
                if (rcd->egrbufs.buffers[e].dma)
@@ -962,13 +996,21 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
                                          rcd->egrbufs.buffers[e].dma);
        }
        kfree(rcd->egrbufs.buffers);
+       rcd->egrbufs.alloced = 0;
+       rcd->egrbufs.buffers = NULL;
 
        sc_free(rcd->sc);
+       rcd->sc = NULL;
+
        vfree(rcd->subctxt_uregbase);
        vfree(rcd->subctxt_rcvegrbuf);
        vfree(rcd->subctxt_rcvhdr_base);
        kfree(rcd->opstats);
-       kfree(rcd);
+
+       rcd->subctxt_uregbase = NULL;
+       rcd->subctxt_rcvegrbuf = NULL;
+       rcd->subctxt_rcvhdr_base = NULL;
+       rcd->opstats = NULL;
 }
 
 /*
@@ -1362,7 +1404,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
                tmp[ctxt] = NULL; /* debugging paranoia */
                if (rcd) {
                        hfi1_clear_tids(rcd);
-                       hfi1_free_ctxtdata(dd, rcd);
+                       hfi1_rcd_put(rcd);
                }
        }
        kfree(tmp);
index 5977673a52d4b7477fa315396b6d43418ca2ceb4..a081a98d728aea38fbecef9ca736cc7f9dd8d967 100644 (file)
 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
 #define OPA_LINK_WIDTH_RESET 0xffff
 
+static int smp_length_check(u32 data_size, u32 request_len)
+{
+       if (unlikely(request_len < data_size))
+               return -EINVAL;
+
+       return 0;
+}
+
 static int reply(struct ib_mad_hdr *smp)
 {
        /*
@@ -105,7 +113,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
                return;
 
        /* o14-3.2.1 */
-       if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE)
+       if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE)
                return;
 
        /* o14-2 */
@@ -172,10 +180,10 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
 }
 
 /*
- * Send a bad [PQ]_Key trap (ch. 14.3.8).
+ * Send a bad P_Key trap (ch. 14.3.8).
  */
-void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
-                   u32 qp1, u32 qp2, u16 lid1, u16 lid2)
+void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
+                  u32 qp1, u32 qp2, u16 lid1, u16 lid2)
 {
        struct opa_mad_notice_attr data;
        u32 lid = ppd_from_ibp(ibp)->lid;
@@ -183,17 +191,13 @@ void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
        u32 _lid2 = lid2;
 
        memset(&data, 0, sizeof(data));
-
-       if (trap_num == OPA_TRAP_BAD_P_KEY)
-               ibp->rvp.pkey_violations++;
-       else
-               ibp->rvp.qkey_violations++;
        ibp->rvp.n_pkt_drops++;
+       ibp->rvp.pkey_violations++;
 
        /* Send violation trap */
        data.generic_type = IB_NOTICE_TYPE_SECURITY;
        data.prod_type_lsb = IB_NOTICE_PROD_CA;
-       data.trap_num = trap_num;
+       data.trap_num = OPA_TRAP_BAD_P_KEY;
        data.issuer_lid = cpu_to_be32(lid);
        data.ntc_257_258.lid1 = cpu_to_be32(_lid1);
        data.ntc_257_258.lid2 = cpu_to_be32(_lid2);
@@ -260,6 +264,7 @@ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
        data.issuer_lid = cpu_to_be32(lid);
        data.ntc_144.lid = data.issuer_lid;
        data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
+       data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags);
 
        send_trap(ibp, &data, sizeof(data));
 }
@@ -307,11 +312,11 @@ void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
 
 static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
                                   u8 *data, struct ib_device *ibdev,
-                                  u8 port, u32 *resp_len)
+                                  u8 port, u32 *resp_len, u32 max_len)
 {
        struct opa_node_description *nd;
 
-       if (am) {
+       if (am || smp_length_check(sizeof(*nd), max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -328,7 +333,7 @@ static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
 
 static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
                                   struct ib_device *ibdev, u8 port,
-                                  u32 *resp_len)
+                                  u32 *resp_len, u32 max_len)
 {
        struct opa_node_info *ni;
        struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -338,6 +343,7 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
 
        /* GUID 0 is illegal */
        if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
+           smp_length_check(sizeof(*ni), max_len) ||
            get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
@@ -519,7 +525,7 @@ void read_ltp_rtt(struct hfi1_devdata *dd)
 
 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
                                   struct ib_device *ibdev, u8 port,
-                                  u32 *resp_len)
+                                  u32 *resp_len, u32 max_len)
 {
        int i;
        struct hfi1_devdata *dd;
@@ -535,7 +541,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
        u32 buffer_units;
        u64 tmp = 0;
 
-       if (num_ports != 1) {
+       if (num_ports != 1 || smp_length_check(sizeof(*pi), max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -605,7 +611,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
                ppd->offline_disabled_reason;
 
        pi->port_states.portphysstate_portstate =
-               (hfi1_ibphys_portstate(ppd) << 4) | state;
+               (driver_pstate(ppd) << 4) | state;
 
        pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
 
@@ -704,11 +710,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
        buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
        pi->buffer_units = cpu_to_be32(buffer_units);
 
-       pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported |
-                                      OPA_CAP_MASK3_IsEthOnFabricSupported);
-       /* Driver does not support mcast/collective configuration */
-       pi->opa_cap_mask &=
-               cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported);
+       pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags);
        pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7)
                                            << 3 | (HFI1_MCAST_NR & 0x7));
 
@@ -748,7 +750,7 @@ static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
 
 static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
                                    struct ib_device *ibdev, u8 port,
-                                   u32 *resp_len)
+                                   u32 *resp_len, u32 max_len)
 {
        struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
        u32 n_blocks_req = OPA_AM_NBLK(am);
@@ -771,6 +773,11 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
 
        size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
 
+       if (smp_length_check(size, max_len)) {
+               smp->status |= IB_SMP_INVALID_FIELD;
+               return reply((struct ib_mad_hdr *)smp);
+       }
+
        if (start_block + n_blocks_req > n_blocks_avail ||
            n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
                pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
@@ -1074,7 +1081,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
  */
 static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
                                   struct ib_device *ibdev, u8 port,
-                                  u32 *resp_len)
+                                  u32 *resp_len, u32 max_len)
 {
        struct opa_port_info *pi = (struct opa_port_info *)data;
        struct ib_event event;
@@ -1095,7 +1102,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
        int ret, i, invalid = 0, call_set_mtu = 0;
        int call_link_downgrade_policy = 0;
 
-       if (num_ports != 1) {
+       if (num_ports != 1 ||
+           smp_length_check(sizeof(*pi), max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -1346,7 +1354,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
        if (ret)
                return ret;
 
-       ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
+       ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
+                                     max_len);
 
        /* restore re-reg bit per o14-12.2.1 */
        pi->clientrereg_subnettimeout |= clientrereg;
@@ -1363,7 +1372,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
        return ret;
 
 get_only:
-       return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
+       return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
+                                      max_len);
 }
 
 /**
@@ -1424,7 +1434,7 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
 
 static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
                                    struct ib_device *ibdev, u8 port,
-                                   u32 *resp_len)
+                                   u32 *resp_len, u32 max_len)
 {
        struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
        u32 n_blocks_sent = OPA_AM_NBLK(am);
@@ -1434,6 +1444,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
        int i;
        u16 n_blocks_avail;
        unsigned npkeys = hfi1_get_npkeys(dd);
+       u32 size = 0;
 
        if (n_blocks_sent == 0) {
                pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
@@ -1444,6 +1455,13 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
 
        n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
 
+       size = sizeof(u16) * (n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE);
+
+       if (smp_length_check(size, max_len)) {
+               smp->status |= IB_SMP_INVALID_FIELD;
+               return reply((struct ib_mad_hdr *)smp);
+       }
+
        if (start_block + n_blocks_sent > n_blocks_avail ||
            n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
                pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
@@ -1461,7 +1479,8 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
                return reply((struct ib_mad_hdr *)smp);
        }
 
-       return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
+       return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len,
+                                       max_len);
 }
 
 #define ILLEGAL_VL 12
@@ -1522,14 +1541,14 @@ static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
 
 static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
                                   struct ib_device *ibdev, u8 port,
-                                  u32 *resp_len)
+                                  u32 *resp_len, u32 max_len)
 {
        struct hfi1_ibport *ibp = to_iport(ibdev, port);
        u8 *p = data;
        size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
        unsigned i;
 
-       if (am) {
+       if (am || smp_length_check(size, max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -1545,14 +1564,15 @@ static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
 
 static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
                                   struct ib_device *ibdev, u8 port,
-                                  u32 *resp_len)
+                                  u32 *resp_len, u32 max_len)
 {
        struct hfi1_ibport *ibp = to_iport(ibdev, port);
        u8 *p = data;
+       size_t size = ARRAY_SIZE(ibp->sl_to_sc);
        int i;
        u8 sc;
 
-       if (am) {
+       if (am || smp_length_check(size, max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -1567,19 +1587,20 @@ static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
                }
        }
 
-       return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
+       return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len,
+                                      max_len);
 }
 
 static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
                                   struct ib_device *ibdev, u8 port,
-                                  u32 *resp_len)
+                                  u32 *resp_len, u32 max_len)
 {
        struct hfi1_ibport *ibp = to_iport(ibdev, port);
        u8 *p = data;
        size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
        unsigned i;
 
-       if (am) {
+       if (am || smp_length_check(size, max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -1595,13 +1616,14 @@ static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
 
 static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
                                   struct ib_device *ibdev, u8 port,
-                                  u32 *resp_len)
+                                  u32 *resp_len, u32 max_len)
 {
        struct hfi1_ibport *ibp = to_iport(ibdev, port);
+       size_t size = ARRAY_SIZE(ibp->sc_to_sl);
        u8 *p = data;
        int i;
 
-       if (am) {
+       if (am || smp_length_check(size, max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -1609,19 +1631,20 @@ static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
        for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
                ibp->sc_to_sl[i] = *p++;
 
-       return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len);
+       return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len,
+                                      max_len);
 }
 
 static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
                                    struct ib_device *ibdev, u8 port,
-                                   u32 *resp_len)
+                                   u32 *resp_len, u32 max_len)
 {
        u32 n_blocks = OPA_AM_NBLK(am);
        struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
        void *vp = (void *)data;
        size_t size = 4 * sizeof(u64);
 
-       if (n_blocks != 1) {
+       if (n_blocks != 1 || smp_length_check(size, max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -1636,7 +1659,7 @@ static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
 
 static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
                                    struct ib_device *ibdev, u8 port,
-                                   u32 *resp_len)
+                                   u32 *resp_len, u32 max_len)
 {
        u32 n_blocks = OPA_AM_NBLK(am);
        int async_update = OPA_AM_ASYNC(am);
@@ -1644,8 +1667,15 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
        void *vp = (void *)data;
        struct hfi1_pportdata *ppd;
        int lstate;
+       /*
+        * set_sc2vlt_tables writes the information contained in *data
+        * to four 64-bit registers SendSC2VLt[0-3]. We need to make
+        * sure *max_len is not greater than the total size of the four
+        * SendSC2VLt[0-3] registers.
+        */
+       size_t size = 4 * sizeof(u64);
 
-       if (n_blocks != 1 || async_update) {
+       if (n_blocks != 1 || async_update || smp_length_check(size, max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -1665,27 +1695,28 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
 
        set_sc2vlt_tables(dd, vp);
 
-       return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len);
+       return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len,
+                                       max_len);
 }
 
 static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
                                     struct ib_device *ibdev, u8 port,
-                                    u32 *resp_len)
+                                    u32 *resp_len, u32 max_len)
 {
        u32 n_blocks = OPA_AM_NPORT(am);
        struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
        struct hfi1_pportdata *ppd;
        void *vp = (void *)data;
-       int size;
+       int size = sizeof(struct sc2vlnt);
 
-       if (n_blocks != 1) {
+       if (n_blocks != 1 || smp_length_check(size, max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
 
        ppd = dd->pport + (port - 1);
 
-       size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
+       fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
 
        if (resp_len)
                *resp_len += size;
@@ -1695,15 +1726,16 @@ static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
 
 static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
                                     struct ib_device *ibdev, u8 port,
-                                    u32 *resp_len)
+                                    u32 *resp_len, u32 max_len)
 {
        u32 n_blocks = OPA_AM_NPORT(am);
        struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
        struct hfi1_pportdata *ppd;
        void *vp = (void *)data;
        int lstate;
+       int size = sizeof(struct sc2vlnt);
 
-       if (n_blocks != 1) {
+       if (n_blocks != 1 || smp_length_check(size, max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -1721,12 +1753,12 @@ static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
        fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
 
        return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
-                                        resp_len);
+                                        resp_len, max_len);
 }
 
 static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
                              struct ib_device *ibdev, u8 port,
-                             u32 *resp_len)
+                             u32 *resp_len, u32 max_len)
 {
        u32 nports = OPA_AM_NPORT(am);
        u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@ -1735,7 +1767,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
        struct hfi1_pportdata *ppd;
        struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
 
-       if (nports != 1) {
+       if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -1755,7 +1787,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
                ppd->offline_disabled_reason;
 
        psi->port_states.portphysstate_portstate =
-               (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf);
+               (driver_pstate(ppd) << 4) | (lstate & 0xf);
        psi->link_width_downgrade_tx_active =
                cpu_to_be16(ppd->link_width_downgrade_tx_active);
        psi->link_width_downgrade_rx_active =
@@ -1768,7 +1800,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
 
 static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
                              struct ib_device *ibdev, u8 port,
-                             u32 *resp_len)
+                             u32 *resp_len, u32 max_len)
 {
        u32 nports = OPA_AM_NPORT(am);
        u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@ -1779,7 +1811,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
        struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
        int ret, invalid = 0;
 
-       if (nports != 1) {
+       if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -1809,19 +1841,21 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
        if (invalid)
                smp->status |= IB_SMP_INVALID_FIELD;
 
-       return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len);
+       return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len,
+                                 max_len);
 }
 
 static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
                                     struct ib_device *ibdev, u8 port,
-                                    u32 *resp_len)
+                                    u32 *resp_len, u32 max_len)
 {
        struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
        u32 addr = OPA_AM_CI_ADDR(am);
        u32 len = OPA_AM_CI_LEN(am) + 1;
        int ret;
 
-       if (dd->pport->port_type != PORT_TYPE_QSFP) {
+       if (dd->pport->port_type != PORT_TYPE_QSFP ||
+           smp_length_check(len, max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -1864,21 +1898,22 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
 }
 
 static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
-                             struct ib_device *ibdev, u8 port, u32 *resp_len)
+                             struct ib_device *ibdev, u8 port, u32 *resp_len,
+                             u32 max_len)
 {
        u32 num_ports = OPA_AM_NPORT(am);
        struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
        struct hfi1_pportdata *ppd;
        struct buffer_control *p = (struct buffer_control *)data;
-       int size;
+       int size = sizeof(struct buffer_control);
 
-       if (num_ports != 1) {
+       if (num_ports != 1 || smp_length_check(size, max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
 
        ppd = dd->pport + (port - 1);
-       size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
+       fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
        trace_bct_get(dd, p);
        if (resp_len)
                *resp_len += size;
@@ -1887,14 +1922,15 @@ static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
 }
 
 static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
-                             struct ib_device *ibdev, u8 port, u32 *resp_len)
+                             struct ib_device *ibdev, u8 port, u32 *resp_len,
+                             u32 max_len)
 {
        u32 num_ports = OPA_AM_NPORT(am);
        struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
        struct hfi1_pportdata *ppd;
        struct buffer_control *p = (struct buffer_control *)data;
 
-       if (num_ports != 1) {
+       if (num_ports != 1 || smp_length_check(sizeof(*p), max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -1905,41 +1941,43 @@ static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
                return reply((struct ib_mad_hdr *)smp);
        }
 
-       return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len);
+       return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len,
+                                 max_len);
 }
 
 static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
                                 struct ib_device *ibdev, u8 port,
-                                u32 *resp_len)
+                                u32 *resp_len, u32 max_len)
 {
        struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
        u32 num_ports = OPA_AM_NPORT(am);
        u8 section = (am & 0x00ff0000) >> 16;
        u8 *p = data;
-       int size = 0;
+       int size = 256;
 
-       if (num_ports != 1) {
+       if (num_ports != 1 || smp_length_check(size, max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
 
        switch (section) {
        case OPA_VLARB_LOW_ELEMENTS:
-               size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
+               fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
                break;
        case OPA_VLARB_HIGH_ELEMENTS:
-               size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
+               fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
                break;
        case OPA_VLARB_PREEMPT_ELEMENTS:
-               size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
+               fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
                break;
        case OPA_VLARB_PREEMPT_MATRIX:
-               size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
+               fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
                break;
        default:
                pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
                        be32_to_cpu(smp->attr_mod));
                smp->status |= IB_SMP_INVALID_FIELD;
+               size = 0;
                break;
        }
 
@@ -1951,14 +1989,15 @@ static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
 
 static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
                                 struct ib_device *ibdev, u8 port,
-                                u32 *resp_len)
+                                u32 *resp_len, u32 max_len)
 {
        struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
        u32 num_ports = OPA_AM_NPORT(am);
        u8 section = (am & 0x00ff0000) >> 16;
        u8 *p = data;
+       int size = 256;
 
-       if (num_ports != 1) {
+       if (num_ports != 1 || smp_length_check(size, max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -1986,7 +2025,8 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
                break;
        }
 
-       return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len);
+       return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len,
+                                    max_len);
 }
 
 struct opa_pma_mad {
@@ -3282,13 +3322,18 @@ struct opa_congestion_info_attr {
 
 static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
                                    struct ib_device *ibdev, u8 port,
-                                   u32 *resp_len)
+                                   u32 *resp_len, u32 max_len)
 {
        struct opa_congestion_info_attr *p =
                (struct opa_congestion_info_attr *)data;
        struct hfi1_ibport *ibp = to_iport(ibdev, port);
        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 
+       if (smp_length_check(sizeof(*p), max_len)) {
+               smp->status |= IB_SMP_INVALID_FIELD;
+               return reply((struct ib_mad_hdr *)smp);
+       }
+
        p->congestion_info = 0;
        p->control_table_cap = ppd->cc_max_table_entries;
        p->congestion_log_length = OPA_CONG_LOG_ELEMS;
@@ -3301,7 +3346,7 @@ static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
 
 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
                                       u8 *data, struct ib_device *ibdev,
-                                      u8 port, u32 *resp_len)
+                                      u8 port, u32 *resp_len, u32 max_len)
 {
        int i;
        struct opa_congestion_setting_attr *p =
@@ -3311,6 +3356,11 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
        struct opa_congestion_setting_entry_shadow *entries;
        struct cc_state *cc_state;
 
+       if (smp_length_check(sizeof(*p), max_len)) {
+               smp->status |= IB_SMP_INVALID_FIELD;
+               return reply((struct ib_mad_hdr *)smp);
+       }
+
        rcu_read_lock();
 
        cc_state = get_cc_state(ppd);
@@ -3385,7 +3435,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd)
 
 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
                                       struct ib_device *ibdev, u8 port,
-                                      u32 *resp_len)
+                                      u32 *resp_len, u32 max_len)
 {
        struct opa_congestion_setting_attr *p =
                (struct opa_congestion_setting_attr *)data;
@@ -3394,6 +3444,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
        struct opa_congestion_setting_entry_shadow *entries;
        int i;
 
+       if (smp_length_check(sizeof(*p), max_len)) {
+               smp->status |= IB_SMP_INVALID_FIELD;
+               return reply((struct ib_mad_hdr *)smp);
+       }
+
        /*
         * Save details from packet into the ppd.  Hold the cc_state_lock so
         * our information is consistent with anyone trying to apply the state.
@@ -3415,12 +3470,12 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
        apply_cc_state(ppd);
 
        return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
-                                          resp_len);
+                                          resp_len, max_len);
 }
 
 static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
                                        u8 *data, struct ib_device *ibdev,
-                                       u8 port, u32 *resp_len)
+                                       u8 port, u32 *resp_len, u32 max_len)
 {
        struct hfi1_ibport *ibp = to_iport(ibdev, port);
        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -3428,7 +3483,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
        s64 ts;
        int i;
 
-       if (am != 0) {
+       if (am || smp_length_check(sizeof(*cong_log), max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -3486,7 +3541,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
 
 static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
                                   struct ib_device *ibdev, u8 port,
-                                  u32 *resp_len)
+                                  u32 *resp_len, u32 max_len)
 {
        struct ib_cc_table_attr *cc_table_attr =
                (struct ib_cc_table_attr *)data;
@@ -3498,9 +3553,10 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
        int i, j;
        u32 sentry, eentry;
        struct cc_state *cc_state;
+       u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
 
        /* sanity check n_blocks, start_block */
-       if (n_blocks == 0 ||
+       if (n_blocks == 0 || smp_length_check(size, max_len) ||
            start_block + n_blocks > ppd->cc_max_table_entries) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
@@ -3530,14 +3586,14 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
        rcu_read_unlock();
 
        if (resp_len)
-               *resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
+               *resp_len += size;
 
        return reply((struct ib_mad_hdr *)smp);
 }
 
 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
                                   struct ib_device *ibdev, u8 port,
-                                  u32 *resp_len)
+                                  u32 *resp_len, u32 max_len)
 {
        struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
        struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -3548,9 +3604,10 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
        int i, j;
        u32 sentry, eentry;
        u16 ccti_limit;
+       u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
 
        /* sanity check n_blocks, start_block */
-       if (n_blocks == 0 ||
+       if (n_blocks == 0 || smp_length_check(size, max_len) ||
            start_block + n_blocks > ppd->cc_max_table_entries) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
@@ -3581,7 +3638,8 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
        /* now apply the information */
        apply_cc_state(ppd);
 
-       return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len);
+       return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len,
+                                      max_len);
 }
 
 struct opa_led_info {
@@ -3594,7 +3652,7 @@ struct opa_led_info {
 
 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
                                   struct ib_device *ibdev, u8 port,
-                                  u32 *resp_len)
+                                  u32 *resp_len, u32 max_len)
 {
        struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
        struct hfi1_pportdata *ppd = dd->pport;
@@ -3602,7 +3660,7 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
        u32 nport = OPA_AM_NPORT(am);
        u32 is_beaconing_active;
 
-       if (nport != 1) {
+       if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -3624,14 +3682,14 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
 
 static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
                                   struct ib_device *ibdev, u8 port,
-                                  u32 *resp_len)
+                                  u32 *resp_len, u32 max_len)
 {
        struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
        struct opa_led_info *p = (struct opa_led_info *)data;
        u32 nport = OPA_AM_NPORT(am);
        int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
 
-       if (nport != 1) {
+       if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
                smp->status |= IB_SMP_INVALID_FIELD;
                return reply((struct ib_mad_hdr *)smp);
        }
@@ -3641,12 +3699,13 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
        else
                shutdown_led_override(dd->pport);
 
-       return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
+       return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len,
+                                      max_len);
 }
 
 static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
                            u8 *data, struct ib_device *ibdev, u8 port,
-                           u32 *resp_len)
+                           u32 *resp_len, u32 max_len)
 {
        int ret;
        struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -3654,71 +3713,71 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
        switch (attr_id) {
        case IB_SMP_ATTR_NODE_DESC:
                ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
-                                             resp_len);
+                                             resp_len, max_len);
                break;
        case IB_SMP_ATTR_NODE_INFO:
                ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
-                                             resp_len);
+                                             resp_len, max_len);
                break;
        case IB_SMP_ATTR_PORT_INFO:
                ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
-                                             resp_len);
+                                             resp_len, max_len);
                break;
        case IB_SMP_ATTR_PKEY_TABLE:
                ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
-                                              resp_len);
+                                              resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_SL_TO_SC_MAP:
                ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
-                                             resp_len);
+                                             resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_SC_TO_SL_MAP:
                ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
-                                             resp_len);
+                                             resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
                ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
-                                              resp_len);
+                                              resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
                ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
-                                               resp_len);
+                                               resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_PORT_STATE_INFO:
                ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
-                                        resp_len);
+                                        resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
                ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
-                                        resp_len);
+                                        resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_CABLE_INFO:
                ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
-                                               resp_len);
+                                               resp_len, max_len);
                break;
        case IB_SMP_ATTR_VL_ARB_TABLE:
                ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
-                                           resp_len);
+                                           resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_CONGESTION_INFO:
                ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
-                                              resp_len);
+                                              resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
                ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
-                                                 port, resp_len);
+                                                 port, resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
                ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
-                                                  port, resp_len);
+                                                  port, resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
                ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
-                                             resp_len);
+                                             resp_len, max_len);
                break;
        case IB_SMP_ATTR_LED_INFO:
                ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
-                                             resp_len);
+                                             resp_len, max_len);
                break;
        case IB_SMP_ATTR_SM_INFO:
                if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
@@ -3736,7 +3795,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
 
 static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
                            u8 *data, struct ib_device *ibdev, u8 port,
-                           u32 *resp_len)
+                           u32 *resp_len, u32 max_len)
 {
        int ret;
        struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -3744,51 +3803,51 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
        switch (attr_id) {
        case IB_SMP_ATTR_PORT_INFO:
                ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
-                                             resp_len);
+                                             resp_len, max_len);
                break;
        case IB_SMP_ATTR_PKEY_TABLE:
                ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
-                                              resp_len);
+                                              resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_SL_TO_SC_MAP:
                ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
-                                             resp_len);
+                                             resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_SC_TO_SL_MAP:
                ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
-                                             resp_len);
+                                             resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
                ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
-                                              resp_len);
+                                              resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
                ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
-                                               resp_len);
+                                               resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_PORT_STATE_INFO:
                ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
-                                        resp_len);
+                                        resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
                ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
-                                        resp_len);
+                                        resp_len, max_len);
                break;
        case IB_SMP_ATTR_VL_ARB_TABLE:
                ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
-                                           resp_len);
+                                           resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
                ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
-                                                 port, resp_len);
+                                                 port, resp_len, max_len);
                break;
        case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
                ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
-                                             resp_len);
+                                             resp_len, max_len);
                break;
        case IB_SMP_ATTR_LED_INFO:
                ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
-                                             resp_len);
+                                             resp_len, max_len);
                break;
        case IB_SMP_ATTR_SM_INFO:
                if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
@@ -3844,7 +3903,10 @@ static int subn_get_opa_aggregate(struct opa_smp *smp,
                memset(next_smp + sizeof(*agg), 0, agg_data_len);
 
                (void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
-                                       ibdev, port, NULL);
+                                      ibdev, port, NULL, (u32)agg_data_len);
+
+               if (smp->status & IB_SMP_INVALID_FIELD)
+                       break;
                if (smp->status & ~IB_SMP_DIRECTION) {
                        set_aggr_error(agg);
                        return reply((struct ib_mad_hdr *)smp);
@@ -3887,7 +3949,9 @@ static int subn_set_opa_aggregate(struct opa_smp *smp,
                }
 
                (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
-                                       ibdev, port, NULL);
+                                      ibdev, port, NULL, (u32)agg_data_len);
+               if (smp->status & IB_SMP_INVALID_FIELD)
+                       break;
                if (smp->status & ~IB_SMP_DIRECTION) {
                        set_aggr_error(agg);
                        return reply((struct ib_mad_hdr *)smp);
@@ -3997,12 +4061,13 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
        struct opa_smp *smp = (struct opa_smp *)out_mad;
        struct hfi1_ibport *ibp = to_iport(ibdev, port);
        u8 *data;
-       u32 am;
+       u32 am, data_size;
        __be16 attr_id;
        int ret;
 
        *out_mad = *in_mad;
        data = opa_get_smp_data(smp);
+       data_size = (u32)opa_get_smp_data_size(smp);
 
        am = be32_to_cpu(smp->attr_mod);
        attr_id = smp->attr_id;
@@ -4046,7 +4111,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
                default:
                        clear_opa_smp_data(smp);
                        ret = subn_get_opa_sma(attr_id, smp, am, data,
-                                              ibdev, port, resp_len);
+                                              ibdev, port, resp_len,
+                                              data_size);
                        break;
                case OPA_ATTRIB_ID_AGGREGATE:
                        ret = subn_get_opa_aggregate(smp, ibdev, port,
@@ -4058,7 +4124,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
                switch (attr_id) {
                default:
                        ret = subn_set_opa_sma(attr_id, smp, am, data,
-                                              ibdev, port, resp_len);
+                                              ibdev, port, resp_len,
+                                              data_size);
                        break;
                case OPA_ATTRIB_ID_AGGREGATE:
                        ret = subn_set_opa_aggregate(smp, ibdev, port,
index 5aa3fd1be6538e6b37842c3cd734dedaf18f8dc0..a4e2506bd5ca92753b4ff0dbb50666336637072f 100644 (file)
@@ -115,7 +115,7 @@ struct opa_mad_notice_attr {
                        __be32  lid;            /* LID where change occurred */
                        __be32  new_cap_mask;   /* new capability mask */
                        __be16  reserved2;
-                       __be16  cap_mask;
+                       __be16  cap_mask3;
                        __be16  change_flags;   /* low 4 bits only */
                } __packed ntc_144;
 
index ccbf52c8ff6f037a485060e9d78f66c3b7fe79e6..d41fd87a39f26a2d18070754d1b579039018463e 100644 (file)
@@ -217,21 +217,27 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
        return node;
 }
 
-struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler,
-                                       unsigned long addr, unsigned long len)
+bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
+                                    unsigned long addr, unsigned long len,
+                                    struct mmu_rb_node **rb_node)
 {
        struct mmu_rb_node *node;
        unsigned long flags;
+       bool ret = false;
 
        spin_lock_irqsave(&handler->lock, flags);
        node = __mmu_rb_search(handler, addr, len);
        if (node) {
+               if (node->addr == addr && node->len == len)
+                       goto unlock;
                __mmu_int_rb_remove(node, &handler->root);
                list_del(&node->list); /* remove from LRU list */
+               ret = true;
        }
+unlock:
        spin_unlock_irqrestore(&handler->lock, flags);
-
-       return node;
+       *rb_node = node;
+       return ret;
 }
 
 void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
index 754f6ebf13fb1ac61d42dee6f2a31c726d98d42e..f04cec1e99d11a2d1edb2640667a9156ccca3828 100644 (file)
@@ -81,7 +81,8 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
 void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg);
 void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
                        struct mmu_rb_node *mnode);
-struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler,
-                                       unsigned long addr, unsigned long len);
+bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
+                                    unsigned long addr, unsigned long len,
+                                    struct mmu_rb_node **rb_node);
 
 #endif /* _HFI1_MMU_RB_H */
index 6a9f6f9819e1a326b0ed5f15f831d10cec2620a6..f01841b519461221ef06cd51a6a5df7b8f1241d9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -240,50 +240,6 @@ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
                iounmap(dd->piobase);
 }
 
-static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt,
-                      struct hfi1_msix_entry *hfi1_msix_entry)
-{
-       int ret;
-       int nvec = *msixcnt;
-       struct msix_entry *msix_entry;
-       int i;
-
-       /*
-        * We can't pass hfi1_msix_entry array to msix_setup
-        * so use a dummy msix_entry array and copy the allocated
-        * irq back to the hfi1_msix_entry array.
-        */
-       msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL);
-       if (!msix_entry) {
-               ret = -ENOMEM;
-               goto do_intx;
-       }
-
-       for (i = 0; i < nvec; i++)
-               msix_entry[i] = hfi1_msix_entry[i].msix;
-
-       ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec);
-       if (ret < 0)
-               goto free_msix_entry;
-       nvec = ret;
-
-       for (i = 0; i < nvec; i++)
-               hfi1_msix_entry[i].msix = msix_entry[i];
-
-       kfree(msix_entry);
-       *msixcnt = nvec;
-       return;
-
-free_msix_entry:
-       kfree(msix_entry);
-
-do_intx:
-       dd_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, falling back to INTx\n",
-                  nvec, ret);
-       *msixcnt = 0;
-       hfi1_enable_intx(dd->pcidev);
-}
-
 /* return the PCIe link speed from the given link status */
 static u32 extract_speed(u16 linkstat)
 {
@@ -364,33 +320,29 @@ int pcie_speeds(struct hfi1_devdata *dd)
 }
 
 /*
- * Returns in *nent:
- *     - actual number of interrupts allocated
+ * Returns:
+ *     - actual number of interrupts allocated or
  *     - 0 if fell back to INTx.
+ *      - error
  */
-void request_msix(struct hfi1_devdata *dd, u32 *nent,
-                 struct hfi1_msix_entry *entry)
+int request_msix(struct hfi1_devdata *dd, u32 msireq)
 {
-       int pos;
+       int nvec;
 
-       pos = dd->pcidev->msix_cap;
-       if (*nent && pos) {
-               msix_setup(dd, pos, nent, entry);
-               /* did it, either MSI-X or INTx */
-       } else {
-               *nent = 0;
-               hfi1_enable_intx(dd->pcidev);
+       nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq,
+                                    PCI_IRQ_MSIX | PCI_IRQ_LEGACY);
+       if (nvec < 0) {
+               dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
+               return nvec;
        }
 
        tune_pcie_caps(dd);
-}
 
-void hfi1_enable_intx(struct pci_dev *pdev)
-{
-       /* first, turn on INTx */
-       pci_intx(pdev, 1);
-       /* then turn off MSI-X */
-       pci_disable_msix(pdev);
+       /* check for legacy IRQ */
+       if (nvec == 1 && !dd->pcidev->msix_enabled)
+               return 0;
+
+       return nvec;
 }
 
 /* restore command and BARs after a reset has wiped them out */
index 838fe84e285aeef2e89805075ded337ac2502a63..41307e4745257264655d29a06e9991807f18334d 100644 (file)
@@ -136,7 +136,6 @@ static void save_platform_config_fields(struct hfi1_devdata *dd)
 void get_platform_config(struct hfi1_devdata *dd)
 {
        int ret = 0;
-       unsigned long size = 0;
        u8 *temp_platform_config = NULL;
        u32 esize;
 
@@ -160,15 +159,6 @@ void get_platform_config(struct hfi1_devdata *dd)
                        dd->platform_config.size = esize;
                        return;
                }
-               /* fail, try EFI variable */
-
-               ret = read_hfi1_efi_var(dd, "configuration", &size,
-                                       (void **)&temp_platform_config);
-               if (!ret) {
-                       dd->platform_config.data = temp_platform_config;
-                       dd->platform_config.size = size;
-                       return;
-               }
        }
        dd_dev_err(dd,
                   "%s: Failed to get platform config, falling back to sub-optimal default file\n",
@@ -242,7 +232,7 @@ static int qual_power(struct hfi1_pportdata *ppd)
 
        if (ppd->offline_disabled_reason ==
                        HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) {
-               dd_dev_info(
+               dd_dev_err(
                        ppd->dd,
                        "%s: Port disabled due to system power restrictions\n",
                        __func__);
@@ -268,7 +258,7 @@ static int qual_bitrate(struct hfi1_pportdata *ppd)
 
        if (ppd->offline_disabled_reason ==
                        HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY)) {
-               dd_dev_info(
+               dd_dev_err(
                        ppd->dd,
                        "%s: Cable failed bitrate check, disabling port\n",
                        __func__);
@@ -709,15 +699,15 @@ static void apply_tunings(
                ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
                                       GENERAL_CONFIG, config_data);
                if (ret != HCMD_SUCCESS)
-                       dd_dev_info(ppd->dd,
-                                   "%s: Failed set ext device config params\n",
-                                   __func__);
+                       dd_dev_err(ppd->dd,
+                                  "%s: Failed set ext device config params\n",
+                                  __func__);
        }
 
        if (tx_preset_index == OPA_INVALID_INDEX) {
                if (ppd->port_type == PORT_TYPE_QSFP && limiting_active)
-                       dd_dev_info(ppd->dd, "%s: Invalid Tx preset index\n",
-                                   __func__);
+                       dd_dev_err(ppd->dd, "%s: Invalid Tx preset index\n",
+                                  __func__);
                return;
        }
 
@@ -900,7 +890,7 @@ static int tune_qsfp(struct hfi1_pportdata *ppd,
        case 0xD: /* fallthrough */
        case 0xF:
        default:
-               dd_dev_info(ppd->dd, "%s: Unknown/unsupported cable\n",
+               dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n",
                            __func__);
                break;
        }
@@ -942,7 +932,7 @@ void tune_serdes(struct hfi1_pportdata *ppd)
        case PORT_TYPE_DISCONNECTED:
                ppd->offline_disabled_reason =
                        HFI1_ODR_MASK(OPA_LINKDOWN_REASON_DISCONNECTED);
-               dd_dev_info(dd, "%s: Port disconnected, disabling port\n",
+               dd_dev_warn(dd, "%s: Port disconnected, disabling port\n",
                            __func__);
                goto bail;
        case PORT_TYPE_FIXED:
@@ -1027,7 +1017,7 @@ void tune_serdes(struct hfi1_pportdata *ppd)
                }
                break;
        default:
-               dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__);
+               dd_dev_warn(ppd->dd, "%s: Unknown port type\n", __func__);
                ppd->port_type = PORT_TYPE_UNKNOWN;
                tuning_method = OPA_UNKNOWN_TUNING;
                total_atten = 0;
index 1a7af9f60c137f916a125588318a182cf535ac01..198c7b4f3c789309f8a765ac049271de36d075fe 100644 (file)
@@ -73,12 +73,6 @@ static void iowait_wakeup(struct iowait *wait, int reason);
 static void iowait_sdma_drained(struct iowait *wait);
 static void qp_pio_drain(struct rvt_qp *qp);
 
-static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
-                             struct rvt_qpn_map *map, unsigned off)
-{
-       return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
-}
-
 const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
 [IB_WR_RDMA_WRITE] = {
        .length = sizeof(struct ib_rdma_wr),
index 1080778a1f7c4a38816ce02058f63baae862d89e..baa67bf0772b297667d5caf7f31873133c073d19 100644 (file)
@@ -765,7 +765,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
                ohdr->u.aeth = rvt_compute_aeth(qp);
        sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
        /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
-       pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
+       pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
        lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr)
                                     & 0xf) << 4;
        hdr.lrh[0] = cpu_to_be16(lrh0);
@@ -798,7 +798,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
                goto queue_ack;
        }
 
-       trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr);
+       trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
+                              &hdr, ib_is_sc5(sc5));
 
        /* write the pbc and data */
        ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords);
@@ -1009,7 +1010,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
                return;
        }
 
-       psn = be32_to_cpu(ohdr->bth[2]);
+       psn = ib_bth_get_psn(ohdr);
        reset_sending_psn(qp, psn);
 
        /*
@@ -1915,17 +1916,16 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
 void hfi1_rc_rcv(struct hfi1_packet *packet)
 {
        struct hfi1_ctxtdata *rcd = packet->rcd;
-       struct ib_header *hdr = packet->hdr;
-       u32 rcv_flags = packet->rcv_flags;
        void *data = packet->ebuf;
        u32 tlen = packet->tlen;
        struct rvt_qp *qp = packet->qp;
        struct hfi1_ibport *ibp = rcd_to_iport(rcd);
        struct ib_other_headers *ohdr = packet->ohdr;
-       u32 bth0, opcode;
+       u32 bth0;
+       u32 opcode = packet->opcode;
        u32 hdrsize = packet->hlen;
        u32 psn;
-       u32 pad;
+       u32 pad = packet->pad;
        struct ib_wc wc;
        u32 pmtu = qp->pmtu;
        int diff;
@@ -1937,14 +1937,13 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
        u32 rkey;
 
        lockdep_assert_held(&qp->r_lock);
+
        bth0 = be32_to_cpu(ohdr->bth[0]);
-       if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
+       if (hfi1_ruc_check_hdr(ibp, packet))
                return;
 
        is_fecn = process_ecn(qp, packet, false);
-
-       psn = be32_to_cpu(ohdr->bth[2]);
-       opcode = ib_bth_get_opcode(ohdr);
+       psn = ib_bth_get_psn(ohdr);
 
        /*
         * Process responses (ACKs) before anything else.  Note that the
@@ -2074,8 +2073,6 @@ no_immediate_data:
                wc.wc_flags = 0;
                wc.ex.imm_data = 0;
 send_last:
-               /* Get the number of bytes the message was padded by. */
-               pad = ib_bth_get_pad(ohdr);
                /* Check for invalid length. */
                /* LAST len should be >= 1 */
                if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -2368,28 +2365,19 @@ send_ack:
 
 void hfi1_rc_hdrerr(
        struct hfi1_ctxtdata *rcd,
-       struct ib_header *hdr,
-       u32 rcv_flags,
+       struct hfi1_packet *packet,
        struct rvt_qp *qp)
 {
-       int has_grh = rcv_flags & HFI1_HAS_GRH;
-       struct ib_other_headers *ohdr;
        struct hfi1_ibport *ibp = rcd_to_iport(rcd);
        int diff;
        u32 opcode;
-       u32 psn, bth0;
-
-       /* Check for GRH */
-       ohdr = &hdr->u.oth;
-       if (has_grh)
-               ohdr = &hdr->u.l.oth;
+       u32 psn;
 
-       bth0 = be32_to_cpu(ohdr->bth[0]);
-       if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0))
+       if (hfi1_ruc_check_hdr(ibp, packet))
                return;
 
-       psn = be32_to_cpu(ohdr->bth[2]);
-       opcode = ib_bth_get_opcode(ohdr);
+       psn = ib_bth_get_psn(packet->ohdr);
+       opcode = ib_bth_get_opcode(packet->ohdr);
 
        /* Only deal with RDMA Writes for now */
        if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
index 3a17daba28a935535904ed3596e9745a9f6c763b..9cf506a9a79691417c2e13ae5d84642be5dbef04 100644 (file)
@@ -75,7 +75,7 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
                        continue;
                /* Check LKEY */
                if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
-                                &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+                                NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
                        goto bad_lkey;
                qp->r_len += wqe->sg_list[i].length;
                j++;
@@ -214,100 +214,95 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
  *
  * The s_lock will be acquired around the hfi1_migrate_qp() call.
  */
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
-                      int has_grh, struct rvt_qp *qp, u32 bth0)
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet)
 {
        __be64 guid;
        unsigned long flags;
+       struct rvt_qp *qp = packet->qp;
        u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
-
-       if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
-               if (!has_grh) {
+       u32 dlid = packet->dlid;
+       u32 slid = packet->slid;
+       u32 sl = packet->sl;
+       int migrated;
+       u32 bth0, bth1;
+
+       bth0 = be32_to_cpu(packet->ohdr->bth[0]);
+       bth1 = be32_to_cpu(packet->ohdr->bth[1]);
+       migrated = bth0 & IB_BTH_MIG_REQ;
+
+       if (qp->s_mig_state == IB_MIG_ARMED && migrated) {
+               if (!packet->grh) {
                        if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
                            IB_AH_GRH)
-                               goto err;
+                               return 1;
                } else {
                        const struct ib_global_route *grh;
 
                        if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
                              IB_AH_GRH))
-                               goto err;
+                               return 1;
                        grh = rdma_ah_read_grh(&qp->alt_ah_attr);
                        guid = get_sguid(ibp, grh->sgid_index);
-                       if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+                       if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix,
                                    guid))
-                               goto err;
+                               return 1;
                        if (!gid_ok(
-                               &hdr->u.l.grh.sgid,
+                               &packet->grh->sgid,
                                grh->dgid.global.subnet_prefix,
                                grh->dgid.global.interface_id))
-                               goto err;
+                               return 1;
                }
-               if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
-                                           ib_get_slid(hdr)))) {
-                       hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
-                                      (u16)bth0,
-                                      ib_get_sl(hdr),
-                                      0, qp->ibqp.qp_num,
-                                      ib_get_slid(hdr),
-                                      ib_get_dlid(hdr));
-                       goto err;
+               if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
+                                           sc5, slid))) {
+                       hfi1_bad_pkey(ibp, (u16)bth0, sl,
+                                     0, qp->ibqp.qp_num, slid, dlid);
+                       return 1;
                }
                /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
-               if (ib_get_slid(hdr) !=
-                       rdma_ah_get_dlid(&qp->alt_ah_attr) ||
+               if (slid != rdma_ah_get_dlid(&qp->alt_ah_attr) ||
                    ppd_from_ibp(ibp)->port !=
                        rdma_ah_get_port_num(&qp->alt_ah_attr))
-                       goto err;
+                       return 1;
                spin_lock_irqsave(&qp->s_lock, flags);
                hfi1_migrate_qp(qp);
                spin_unlock_irqrestore(&qp->s_lock, flags);
        } else {
-               if (!has_grh) {
+               if (!packet->grh) {
                        if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
                                                 IB_AH_GRH)
-                               goto err;
+                               return 1;
                } else {
                        const struct ib_global_route *grh;
 
                        if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
                                                   IB_AH_GRH))
-                               goto err;
+                               return 1;
                        grh = rdma_ah_read_grh(&qp->remote_ah_attr);
                        guid = get_sguid(ibp, grh->sgid_index);
-                       if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+                       if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix,
                                    guid))
-                               goto err;
+                               return 1;
                        if (!gid_ok(
-                            &hdr->u.l.grh.sgid,
+                            &packet->grh->sgid,
                             grh->dgid.global.subnet_prefix,
                             grh->dgid.global.interface_id))
-                               goto err;
+                               return 1;
                }
-               if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
-                                           ib_get_slid(hdr)))) {
-                       hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
-                                      (u16)bth0,
-                                      ib_get_sl(hdr),
-                                      0, qp->ibqp.qp_num,
-                                      ib_get_slid(hdr),
-                                      ib_get_dlid(hdr));
-                       goto err;
+               if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
+                                           sc5, slid))) {
+                       hfi1_bad_pkey(ibp, (u16)bth0, sl,
+                                     0, qp->ibqp.qp_num, slid, dlid);
+                       return 1;
                }
                /* Validate the SLID. See Ch. 9.6.1.5 */
-               if (ib_get_slid(hdr) !=
-                       rdma_ah_get_dlid(&qp->remote_ah_attr) ||
+               if ((slid != rdma_ah_get_dlid(&qp->remote_ah_attr)) ||
                    ppd_from_ibp(ibp)->port != qp->port_num)
-                       goto err;
-               if (qp->s_mig_state == IB_MIG_REARM &&
-                   !(bth0 & IB_BTH_MIG_REQ))
+                       return 1;
+               if (qp->s_mig_state == IB_MIG_REARM && !migrated)
                        qp->s_mig_state = IB_MIG_ARMED;
        }
 
        return 0;
-
-err:
-       return 1;
 }
 
 /**
index bfd0d5187e9b003f676327cf3d906acdf8bf676f..d82ff57214c54902a6ff6ea48f9fde96e556e8e4 100644 (file)
@@ -1340,10 +1340,8 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
  * @dd: hfi1_devdata
  * @port: port number (currently only zero)
  *
- * sdma_init initializes the specified number of engines.
- *
- * The code initializes each sde, its csrs.  Interrupts
- * are not required to be enabled.
+ * Initializes each sde and its csrs.
+ * Interrupts are not required to be enabled.
  *
  * Returns:
  * 0 - success, -errno on failure
index eafae487face7783dc9123425f7ac342da51f1de..b80b74d0c252056e7399a70f89d565192cc51739 100644 (file)
@@ -47,7 +47,7 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
-u8 ibhdr_exhdr_len(struct ib_header *hdr)
+u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr)
 {
        struct ib_other_headers *ohdr;
        u8 opcode;
@@ -61,13 +61,18 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr)
               0 : hdr_len_by_opcode[opcode] - (12 + 8);
 }
 
-#define IMM_PRN  "imm %d"
-#define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x"
-#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x"
-#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x"
-#define IETH_PRN "ieth rkey 0x%.8x"
-#define ATOMICACKETH_PRN "origdata %llx"
-#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx"
+const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet)
+{
+       return "IB";
+}
+
+#define IMM_PRN  "imm:%d"
+#define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x"
+#define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x"
+#define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x"
+#define IETH_PRN "ieth rkey:0x%.8x"
+#define ATOMICACKETH_PRN "origdata:%llx"
+#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
 
 #define OP(transport, op) IB_OPCODE_## transport ## _ ## op
 
@@ -84,6 +89,43 @@ static const char *parse_syndrome(u8 syndrome)
        return "";
 }
 
+void hfi1_trace_parse_bth(struct ib_other_headers *ohdr,
+                         u8 *ack, u8 *becn, u8 *fecn, u8 *mig,
+                         u8 *se, u8 *pad, u8 *opcode, u8 *tver,
+                         u16 *pkey, u32 *psn, u32 *qpn)
+{
+       *ack = ib_bth_get_ackreq(ohdr);
+       *becn = ib_bth_get_becn(ohdr);
+       *fecn = ib_bth_get_fecn(ohdr);
+       *mig = ib_bth_get_migreq(ohdr);
+       *se = ib_bth_get_se(ohdr);
+       *pad = ib_bth_get_pad(ohdr);
+       *opcode = ib_bth_get_opcode(ohdr);
+       *tver = ib_bth_get_tver(ohdr);
+       *pkey = ib_bth_get_pkey(ohdr);
+       *psn = ib_bth_get_psn(ohdr);
+       *qpn = ib_bth_get_qpn(ohdr);
+}
+
+void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5,
+                            struct ib_other_headers **ohdr,
+                            u8 *lnh, u8 *lver, u8 *sl, u8 *sc,
+                            u16 *len, u32 *dlid, u32 *slid)
+{
+       *lnh = ib_get_lnh(hdr);
+       *lver = ib_get_lver(hdr);
+       *sl = ib_get_sl(hdr);
+       *sc = ib_get_sc(hdr) | (sc5 << 4);
+       *len = ib_get_len(hdr);
+       *dlid = ib_get_dlid(hdr);
+       *slid = ib_get_slid(hdr);
+
+       if (*lnh == HFI1_LRH_BTH)
+               *ohdr = &hdr->u.oth;
+       else
+               *ohdr = &hdr->u.l.oth;
+}
+
 const char *parse_everbs_hdrs(
        struct trace_seq *p,
        u8 opcode,
index 090f6b506953128d17f1aa32e61710122777b47b..0f2d2da057ecffa34a441d43437218dd4fc36855 100644 (file)
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_ibhdrs
 
-u8 ibhdr_exhdr_len(struct ib_header *hdr);
+#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode  }
+#define show_ib_opcode(opcode)                             \
+__print_symbolic(opcode,                                   \
+       ib_opcode_name(RC_SEND_FIRST),                     \
+       ib_opcode_name(RC_SEND_MIDDLE),                    \
+       ib_opcode_name(RC_SEND_LAST),                      \
+       ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE),       \
+       ib_opcode_name(RC_SEND_ONLY),                      \
+       ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE),       \
+       ib_opcode_name(RC_RDMA_WRITE_FIRST),               \
+       ib_opcode_name(RC_RDMA_WRITE_MIDDLE),              \
+       ib_opcode_name(RC_RDMA_WRITE_LAST),                \
+       ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
+       ib_opcode_name(RC_RDMA_WRITE_ONLY),                \
+       ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
+       ib_opcode_name(RC_RDMA_READ_REQUEST),              \
+       ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST),       \
+       ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE),      \
+       ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST),        \
+       ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY),        \
+       ib_opcode_name(RC_ACKNOWLEDGE),                    \
+       ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE),             \
+       ib_opcode_name(RC_COMPARE_SWAP),                   \
+       ib_opcode_name(RC_FETCH_ADD),                      \
+       ib_opcode_name(UC_SEND_FIRST),                     \
+       ib_opcode_name(UC_SEND_MIDDLE),                    \
+       ib_opcode_name(UC_SEND_LAST),                      \
+       ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE),       \
+       ib_opcode_name(UC_SEND_ONLY),                      \
+       ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE),       \
+       ib_opcode_name(UC_RDMA_WRITE_FIRST),               \
+       ib_opcode_name(UC_RDMA_WRITE_MIDDLE),              \
+       ib_opcode_name(UC_RDMA_WRITE_LAST),                \
+       ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
+       ib_opcode_name(UC_RDMA_WRITE_ONLY),                \
+       ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
+       ib_opcode_name(UD_SEND_ONLY),                      \
+       ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE),       \
+       ib_opcode_name(CNP))
+
 const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
+u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr);
+const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet);
+void hfi1_trace_parse_bth(struct ib_other_headers *ohdr,
+                         u8 *ack, u8 *becn, u8 *fecn, u8 *mig,
+                         u8 *se, u8 *pad, u8 *opcode, u8 *tver,
+                         u16 *pkey, u32 *psn, u32 *qpn);
+void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5,
+                            struct ib_other_headers **ohdr,
+                            u8 *lnh, u8 *lver, u8 *sl, u8 *sc,
+                            u16 *len, u32 *dlid, u32 *slid);
 
 #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
 
@@ -66,139 +115,198 @@ __print_symbolic(lrh,                    \
        lrh_name(LRH_BTH),               \
        lrh_name(LRH_GRH))
 
-#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x"
+#define LRH_PRN "len:%d sc:%d dlid:0x%.4x slid:0x%.4x"
+#define LRH_9B_PRN "lnh:%d,%s lver:%d sl:%d "
 #define BTH_PRN \
-       "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \
-       "f %d b %d qpn 0x%.6x a %d psn 0x%.8x"
-#define EHDR_PRN "%s"
+       "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d pkey:0x%.4x " \
+       "f:%d b:%d qpn:0x%.6x a:%d psn:0x%.8x"
+#define EHDR_PRN "hlen:%d %s"
 
-DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
+DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
                    TP_PROTO(struct hfi1_devdata *dd,
-                            struct ib_header *hdr),
-                   TP_ARGS(dd, hdr),
+                            struct hfi1_packet *packet,
+                            bool sc5),
+                   TP_ARGS(dd, packet, sc5),
                    TP_STRUCT__entry(
                        DD_DEV_ENTRY(dd)
-                       /* LRH */
-                       __field(u8, vl)
+                       __field(u8, lnh)
                        __field(u8, lver)
                        __field(u8, sl)
+                       __field(u16, len)
+                       __field(u32, dlid)
+                       __field(u8, sc)
+                       __field(u32, slid)
+                       __field(u8, opcode)
+                       __field(u8, se)
+                       __field(u8, mig)
+                       __field(u8, pad)
+                       __field(u8, tver)
+                       __field(u16, pkey)
+                       __field(u8, fecn)
+                       __field(u8, becn)
+                       __field(u32, qpn)
+                       __field(u8, ack)
+                       __field(u32, psn)
+                       /* extended headers */
+                       __dynamic_array(u8, ehdrs,
+                                       hfi1_trace_ib_hdr_len(packet->hdr))
+                       ),
+                   TP_fast_assign(
+                          struct ib_other_headers *ohdr;
+
+                          DD_DEV_ASSIGN(dd);
+
+                          hfi1_trace_parse_9b_hdr(packet->hdr, sc5,
+                                                  &ohdr,
+                                                  &__entry->lnh,
+                                                  &__entry->lver,
+                                                  &__entry->sl,
+                                                  &__entry->sc,
+                                                  &__entry->len,
+                                                  &__entry->dlid,
+                                                  &__entry->slid);
+
+                         hfi1_trace_parse_bth(ohdr, &__entry->ack,
+                                              &__entry->becn, &__entry->fecn,
+                                              &__entry->mig, &__entry->se,
+                                              &__entry->pad, &__entry->opcode,
+                                              &__entry->tver, &__entry->pkey,
+                                              &__entry->psn, &__entry->qpn);
+                         /* extended headers */
+                         memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
+                                __get_dynamic_array_len(ehdrs));
+                        ),
+                   TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " "
+                             BTH_PRN " " EHDR_PRN,
+                             __get_str(dev),
+                             __entry->len,
+                             __entry->sc,
+                             __entry->dlid,
+                             __entry->slid,
+                             __entry->lnh, show_lnh(__entry->lnh),
+                             __entry->lver,
+                             __entry->sl,
+                             /* BTH */
+                             __entry->opcode, show_ib_opcode(__entry->opcode),
+                             __entry->se,
+                             __entry->mig,
+                             __entry->pad,
+                             __entry->tver,
+                             __entry->pkey,
+                             __entry->fecn,
+                             __entry->becn,
+                             __entry->qpn,
+                             __entry->ack,
+                             __entry->psn,
+                             /* extended headers */
+                             __get_dynamic_array_len(ehdrs),
+                             __parse_ib_ehdrs(
+                                       __entry->opcode,
+                                       (void *)__get_dynamic_array(ehdrs))
+                            )
+);
+
+DEFINE_EVENT(hfi1_input_ibhdr_template, input_ibhdr,
+            TP_PROTO(struct hfi1_devdata *dd,
+                     struct hfi1_packet *packet, bool sc5),
+            TP_ARGS(dd, packet, sc5));
+
+DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
+                   TP_PROTO(struct hfi1_devdata *dd,
+                            struct ib_header *hdr,
+                            bool sc5),
+                   TP_ARGS(dd, hdr, sc5),
+                   TP_STRUCT__entry(
+                       DD_DEV_ENTRY(dd)
                        __field(u8, lnh)
-                       __field(u16, dlid)
+                       __field(u8, lver)
+                       __field(u8, sl)
                        __field(u16, len)
-                       __field(u16, slid)
-                       /* BTH */
+                       __field(u32, dlid)
+                       __field(u8, sc)
+                       __field(u32, slid)
                        __field(u8, opcode)
                        __field(u8, se)
-                       __field(u8, m)
+                       __field(u8, mig)
                        __field(u8, pad)
                        __field(u8, tver)
                        __field(u16, pkey)
-                       __field(u8, f)
-                       __field(u8, b)
+                       __field(u8, fecn)
+                       __field(u8, becn)
                        __field(u32, qpn)
-                       __field(u8, a)
+                       __field(u8, ack)
                        __field(u32, psn)
                        /* extended headers */
-                       __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
+                       __dynamic_array(u8, ehdrs,
+                                       hfi1_trace_ib_hdr_len(hdr))
                        ),
-                     TP_fast_assign(
+                   TP_fast_assign(
                        struct ib_other_headers *ohdr;
 
                        DD_DEV_ASSIGN(dd);
-                       /* LRH */
-                       __entry->vl =
-                       (u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
-                       __entry->lver =
-                       (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
-                       __entry->sl =
-                       (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
-                       __entry->lnh =
-                       (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
-                       __entry->dlid =
-                       be16_to_cpu(hdr->lrh[1]);
-                       /* allow for larger len */
-                       __entry->len =
-                       be16_to_cpu(hdr->lrh[2]);
-                       __entry->slid =
-                       be16_to_cpu(hdr->lrh[3]);
-                       /* BTH */
-                       if (__entry->lnh == HFI1_LRH_BTH)
-                       ohdr = &hdr->u.oth;
-                       else
-                       ohdr = &hdr->u.l.oth;
-                       __entry->opcode =
-                       (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
-                       __entry->se =
-                       (be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
-                       __entry->m =
-                       (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
-                       __entry->pad =
-                       (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-                       __entry->tver =
-                       (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
-                       __entry->pkey =
-                       be32_to_cpu(ohdr->bth[0]) & 0xffff;
-                       __entry->f =
-                       (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) &
-                       IB_FECN_MASK;
-                       __entry->b =
-                       (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) &
-                       IB_BECN_MASK;
-                       __entry->qpn =
-                       be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
-                       __entry->a =
-                       (be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
-                       /* allow for larger PSN */
-                       __entry->psn =
-                       be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
+
+                       hfi1_trace_parse_9b_hdr(hdr, sc5,
+                                               &ohdr, &__entry->lnh,
+                                               &__entry->lver, &__entry->sl,
+                                               &__entry->sc, &__entry->len,
+                                               &__entry->dlid, &__entry->slid);
+
+                       hfi1_trace_parse_bth(ohdr, &__entry->ack,
+                                            &__entry->becn, &__entry->fecn,
+                                            &__entry->mig, &__entry->se,
+                                            &__entry->pad, &__entry->opcode,
+                                            &__entry->tver, &__entry->pkey,
+                                            &__entry->psn, &__entry->qpn);
+
                        /* extended headers */
-                       memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
-                              ibhdr_exhdr_len(hdr));
-                       ),
-               TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
-                         __get_str(dev),
-                         /* LRH */
-                         __entry->vl,
-                         __entry->lver,
-                         __entry->sl,
-                         __entry->lnh, show_lnh(__entry->lnh),
-                         __entry->dlid,
-                         __entry->len,
-                         __entry->slid,
-                         /* BTH */
-                         __entry->opcode, show_ib_opcode(__entry->opcode),
-                         __entry->se,
-                         __entry->m,
-                         __entry->pad,
-                         __entry->tver,
-                         __entry->pkey,
-                         __entry->f,
-                         __entry->b,
-                         __entry->qpn,
-                         __entry->a,
-                         __entry->psn,
-                         /* extended headers */
-                         __parse_ib_ehdrs(
-                               __entry->opcode,
-                               (void *)__get_dynamic_array(ehdrs))
-                       )
+                       memcpy(__get_dynamic_array(ehdrs),
+                              &ohdr->u, __get_dynamic_array_len(ehdrs));
+                   ),
+                   TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " "
+                             BTH_PRN " " EHDR_PRN,
+                             __get_str(dev),
+                             __entry->len,
+                             __entry->sc,
+                             __entry->dlid,
+                             __entry->slid,
+                             __entry->lnh, show_lnh(__entry->lnh),
+                             __entry->lver,
+                             __entry->sl,
+                             /* BTH */
+                             __entry->opcode, show_ib_opcode(__entry->opcode),
+                             __entry->se,
+                             __entry->mig,
+                             __entry->pad,
+                             __entry->tver,
+                             __entry->pkey,
+                             __entry->fecn,
+                             __entry->becn,
+                             __entry->qpn,
+                             __entry->ack,
+                             __entry->psn,
+                             /* extended headers */
+                             __get_dynamic_array_len(ehdrs),
+                             __parse_ib_ehdrs(
+                                       __entry->opcode,
+                                       (void *)__get_dynamic_array(ehdrs))
+                            )
 );
 
-DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
-            TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
-            TP_ARGS(dd, hdr));
+DEFINE_EVENT(hfi1_output_ibhdr_template, pio_output_ibhdr,
+            TP_PROTO(struct hfi1_devdata *dd,
+                     struct ib_header *hdr, bool sc5),
+            TP_ARGS(dd, hdr, sc5));
 
-DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
-            TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
-            TP_ARGS(dd, hdr));
+DEFINE_EVENT(hfi1_output_ibhdr_template, ack_output_ibhdr,
+            TP_PROTO(struct hfi1_devdata *dd,
+                     struct ib_header *hdr, bool sc5),
+            TP_ARGS(dd, hdr, sc5));
 
-DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
-            TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
-            TP_ARGS(dd, hdr));
+DEFINE_EVENT(hfi1_output_ibhdr_template, sdma_output_ibhdr,
+            TP_PROTO(struct hfi1_devdata *dd,
+                     struct ib_header *hdr, bool sc5),
+            TP_ARGS(dd, hdr, sc5));
 
-DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
-            TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
-            TP_ARGS(dd, hdr));
 
 #endif /* __HFI1_TRACE_IBHDRS_H */
 
index deac77ddaeab644ce9a74d819f88a40de6220548..8db2253523ffeb2e8791c5cf3f1f8e161b38554b 100644 (file)
@@ -72,6 +72,26 @@ TRACE_EVENT(hfi1_interrupt,
                      __entry->src)
 );
 
+DECLARE_EVENT_CLASS(
+       hfi1_csr_template,
+       TP_PROTO(void __iomem *addr, u64 value),
+       TP_ARGS(addr, value),
+       TP_STRUCT__entry(
+               __field(void __iomem *, addr)
+               __field(u64, value)
+       ),
+       TP_fast_assign(
+               __entry->addr = addr;
+               __entry->value = value;
+       ),
+       TP_printk("addr %p value %llx", __entry->addr, __entry->value)
+);
+
+DEFINE_EVENT(
+       hfi1_csr_template, hfi1_write_rcvarray,
+       TP_PROTO(void __iomem *addr, u64 value),
+       TP_ARGS(addr, value));
+
 #ifdef CONFIG_FAULT_INJECTION
 TRACE_EVENT(hfi1_fault_opcode,
            TP_PROTO(struct rvt_qp *qp, u8 opcode),
index f77e59fb43fee7934fc15096b703cfb962f89f5a..84929578cfe62e8105d1762eebb2f48dc7f850e9 100644 (file)
 
 #include "hfi.h"
 
+#define tidtype_name(type) { PT_##type, #type }
+#define show_tidtype(type)                   \
+__print_symbolic(type,                       \
+       tidtype_name(EXPECTED),              \
+       tidtype_name(EAGER),                 \
+       tidtype_name(INVALID))               \
+
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_rx
 
+#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
+#define show_packettype(etype)                  \
+__print_symbolic(etype,                         \
+       packettype_name(EXPECTED),              \
+       packettype_name(EAGER),                 \
+       packettype_name(IB),                    \
+       packettype_name(ERROR),                 \
+       packettype_name(BYPASS))
+
 TRACE_EVENT(hfi1_rcvhdr,
            TP_PROTO(struct hfi1_devdata *dd,
                     u32 ctxt,
@@ -129,7 +145,8 @@ TRACE_EVENT(hfi1_receive_interrupt,
                      )
 );
 
-TRACE_EVENT(hfi1_exp_tid_reg,
+DECLARE_EVENT_CLASS(
+           hfi1_exp_tid_reg_unreg,
            TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr,
                     u32 npages, unsigned long va, unsigned long pa,
                     dma_addr_t dma),
@@ -163,38 +180,45 @@ TRACE_EVENT(hfi1_exp_tid_reg,
                      )
        );
 
-TRACE_EVENT(hfi1_exp_tid_unreg,
-           TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
-                    unsigned long va, unsigned long pa, dma_addr_t dma),
-           TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
-           TP_STRUCT__entry(
-                            __field(unsigned int, ctxt)
-                            __field(u16, subctxt)
-                            __field(u32, rarr)
-                            __field(u32, npages)
-                            __field(unsigned long, va)
-                            __field(unsigned long, pa)
-                            __field(dma_addr_t, dma)
-                            ),
-           TP_fast_assign(
-                          __entry->ctxt = ctxt;
-                          __entry->subctxt = subctxt;
-                          __entry->rarr = rarr;
-                          __entry->npages = npages;
-                          __entry->va = va;
-                          __entry->pa = pa;
-                          __entry->dma = dma;
-                          ),
-           TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
-                     __entry->ctxt,
-                     __entry->subctxt,
-                     __entry->rarr,
-                     __entry->npages,
-                     __entry->pa,
-                     __entry->va,
-                     __entry->dma
-                     )
-       );
+DEFINE_EVENT(
+       hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg,
+       TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
+                unsigned long va, unsigned long pa, dma_addr_t dma),
+       TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
+
+DEFINE_EVENT(
+       hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg,
+       TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
+                unsigned long va, unsigned long pa, dma_addr_t dma),
+       TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
+
+TRACE_EVENT(
+       hfi1_put_tid,
+       TP_PROTO(struct hfi1_devdata *dd,
+                u32 index, u32 type, unsigned long pa, u16 order),
+       TP_ARGS(dd, index, type, pa, order),
+       TP_STRUCT__entry(
+               DD_DEV_ENTRY(dd)
+               __field(unsigned long, pa);
+               __field(u32, index);
+               __field(u32, type);
+               __field(u16, order);
+       ),
+       TP_fast_assign(
+               DD_DEV_ASSIGN(dd);
+               __entry->pa = pa;
+               __entry->index = index;
+               __entry->type = type;
+               __entry->order = order;
+       ),
+       TP_printk("[%s] type %s pa %lx index %u order %u",
+                 __get_str(dev),
+                 show_tidtype(__entry->type),
+                 __entry->pa,
+                 __entry->index,
+                 __entry->order
+       )
+);
 
 TRACE_EVENT(hfi1_exp_tid_inval,
            TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr,
index 5da1e4546543710a9b11a84c13cf2e368f7d0365..76c2451a53d7b72ef7ef3963049da388c0444348 100644 (file)
@@ -297,31 +297,25 @@ bail_no_tx:
 void hfi1_uc_rcv(struct hfi1_packet *packet)
 {
        struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
-       struct ib_header *hdr = packet->hdr;
-       u32 rcv_flags = packet->rcv_flags;
        void *data = packet->ebuf;
        u32 tlen = packet->tlen;
        struct rvt_qp *qp = packet->qp;
        struct ib_other_headers *ohdr = packet->ohdr;
-       u32 bth0, opcode;
+       u32 opcode = packet->opcode;
        u32 hdrsize = packet->hlen;
        u32 psn;
-       u32 pad;
+       u32 pad = packet->pad;
        struct ib_wc wc;
        u32 pmtu = qp->pmtu;
        struct ib_reth *reth;
-       int has_grh = rcv_flags & HFI1_HAS_GRH;
        int ret;
 
-       bth0 = be32_to_cpu(ohdr->bth[0]);
-       if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0))
+       if (hfi1_ruc_check_hdr(ibp, packet))
                return;
 
        process_ecn(qp, packet, true);
 
-       psn = be32_to_cpu(ohdr->bth[2]);
-       opcode = ib_bth_get_opcode(ohdr);
-
+       psn = ib_bth_get_psn(ohdr);
        /* Compare the PSN verses the expected PSN. */
        if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
                /*
@@ -432,8 +426,6 @@ no_immediate_data:
                wc.ex.imm_data = 0;
                wc.wc_flags = 0;
 send_last:
-               /* Get the number of bytes the message was padded by. */
-               pad = ib_bth_get_pad(ohdr);
                /* Check for invalid length. */
                /* LAST len should be >= 1 */
                if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -527,8 +519,6 @@ rdma_first:
 rdma_last_imm:
                wc.wc_flags = IB_WC_WITH_IMM;
 
-               /* Get the number of bytes the message was padded by. */
-               pad = ib_bth_get_pad(ohdr);
                /* Check for invalid length. */
                /* LAST len should be >= 1 */
                if (unlikely(tlen < (hdrsize + pad + 4)))
index 6a4e95cefae5f64c337b69f75f9187c48a6e40f6..6bf7a1b08491219a1a6c6d3d9a0d72dc5961eb5e 100644 (file)
@@ -110,10 +110,10 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
                                   ((1 << ppd->lmc) - 1));
                if (unlikely(ingress_pkey_check(ppd, pkey, sc5,
                                                qp->s_pkey_index, slid))) {
-                       hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey,
-                                      rdma_ah_get_sl(ah_attr),
-                                      sqp->ibqp.qp_num, qp->ibqp.qp_num,
-                                      slid, rdma_ah_get_dlid(ah_attr));
+                       hfi1_bad_pkey(ibp, pkey,
+                                     rdma_ah_get_sl(ah_attr),
+                                     sqp->ibqp.qp_num, qp->ibqp.qp_num,
+                                     slid, rdma_ah_get_dlid(ah_attr));
                        goto drop;
                }
        }
@@ -128,18 +128,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
 
                qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
                        sqp->qkey : swqe->ud_wr.remote_qkey;
-               if (unlikely(qkey != qp->qkey)) {
-                       u16 lid;
-
-                       lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
-                                         ((1 << ppd->lmc) - 1));
-                       hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
-                                      rdma_ah_get_sl(ah_attr),
-                                      sqp->ibqp.qp_num, qp->ibqp.qp_num,
-                                      lid,
-                                      rdma_ah_get_dlid(ah_attr));
-                       goto drop;
-               }
+               if (unlikely(qkey != qp->qkey))
+                       goto drop; /* silently drop per IBTA spec */
        }
 
        /*
@@ -549,7 +539,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
        hdr.lrh[3] = cpu_to_be16(slid);
 
        plen = 2 /* PBC */ + hwords;
-       pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+       pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
        vl = sc_to_vlt(ppd->dd, sc5);
        pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
        if (ctxt) {
@@ -668,36 +658,31 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
 void hfi1_ud_rcv(struct hfi1_packet *packet)
 {
        struct ib_other_headers *ohdr = packet->ohdr;
-       int opcode;
        u32 hdrsize = packet->hlen;
        struct ib_wc wc;
        u32 qkey;
        u32 src_qp;
-       u16 dlid, pkey;
+       u16 pkey;
        int mgmt_pkey_idx = -1;
        struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
        struct ib_header *hdr = packet->hdr;
-       u32 rcv_flags = packet->rcv_flags;
        void *data = packet->ebuf;
        u32 tlen = packet->tlen;
        struct rvt_qp *qp = packet->qp;
-       bool has_grh = rcv_flags & HFI1_HAS_GRH;
        u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
        u32 bth1;
-       u8 sl_from_sc, sl;
-       u16 slid;
-       u8 extra_bytes;
+       u8 sl_from_sc;
+       u8 extra_bytes = packet->pad;
+       u8 opcode = packet->opcode;
+       u8 sl = packet->sl;
+       u32 dlid = packet->dlid;
+       u32 slid = packet->slid;
 
-       qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-       src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
-       dlid = ib_get_dlid(hdr);
        bth1 = be32_to_cpu(ohdr->bth[1]);
-       slid = ib_get_slid(hdr);
+       qkey = ib_get_qkey(ohdr);
+       src_qp = ib_get_sqpn(ohdr);
        pkey = ib_bth_get_pkey(ohdr);
-       opcode = ib_bth_get_opcode(ohdr);
-       sl = ib_get_sl(hdr);
-       extra_bytes = ib_bth_get_pad(ohdr);
        extra_bytes += (SIZE_OF_CRC << 2);
        sl_from_sc = ibp->sc_to_sl[sc5];
 
@@ -727,10 +712,10 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
                                 * for invalid pkeys is optional according to
                                 * IB spec (release 1.3, section 10.9.4)
                                 */
-                               hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
-                                              pkey, sl,
-                                              src_qp, qp->ibqp.qp_num,
-                                              slid, dlid);
+                               hfi1_bad_pkey(ibp,
+                                             pkey, sl,
+                                             src_qp, qp->ibqp.qp_num,
+                                             slid, dlid);
                                return;
                        }
                } else {
@@ -739,12 +724,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
                        if (mgmt_pkey_idx < 0)
                                goto drop;
                }
-               if (unlikely(qkey != qp->qkey)) {
-                       hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, sl,
-                                      src_qp, qp->ibqp.qp_num,
-                                      slid, dlid);
+               if (unlikely(qkey != qp->qkey)) /* Silent drop */
                        return;
-               }
+
                /* Drop invalid MAD packets (see 13.5.3.1). */
                if (unlikely(qp->ibqp.qp_num == 1 &&
                             (tlen > 2048 || (sc5 == 0xF))))
@@ -811,7 +793,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
                qp->r_flags |= RVT_R_REUSE_SGE;
                goto drop;
        }
-       if (has_grh) {
+       if (packet->grh) {
                hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh,
                              sizeof(struct ib_grh), true, false);
                wc.wc_flags |= IB_WC_GRH;
index a8f0aa4722f6adfb1e045fc800a5e999ce18a2a3..6318e6ca1b188dc9c97d781c044a8262edcbfe90 100644 (file)
 #include "trace.h"
 #include "mmu_rb.h"
 
-struct tid_group {
-       struct list_head list;
-       u32 base;
-       u8 size;
-       u8 used;
-       u8 map;
-};
-
 struct tid_rb_node {
        struct mmu_rb_node mmu;
        unsigned long phys;
@@ -75,8 +67,6 @@ struct tid_pageset {
        u16 count;
 };
 
-#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
-
 #define num_user_pages(vaddr, len)                                    \
        (1 + (((((unsigned long)(vaddr) +                              \
                 (unsigned long)(len) - 1) & PAGE_MASK) -              \
@@ -109,88 +99,6 @@ static struct mmu_rb_ops tid_rb_ops = {
        .invalidate = tid_rb_invalidate
 };
 
-static inline u32 rcventry2tidinfo(u32 rcventry)
-{
-       u32 pair = rcventry & ~0x1;
-
-       return EXP_TID_SET(IDX, pair >> 1) |
-               EXP_TID_SET(CTRL, 1 << (rcventry - pair));
-}
-
-static inline void exp_tid_group_init(struct exp_tid_set *set)
-{
-       INIT_LIST_HEAD(&set->list);
-       set->count = 0;
-}
-
-static inline void tid_group_remove(struct tid_group *grp,
-                                   struct exp_tid_set *set)
-{
-       list_del_init(&grp->list);
-       set->count--;
-}
-
-static inline void tid_group_add_tail(struct tid_group *grp,
-                                     struct exp_tid_set *set)
-{
-       list_add_tail(&grp->list, &set->list);
-       set->count++;
-}
-
-static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
-{
-       struct tid_group *grp =
-               list_first_entry(&set->list, struct tid_group, list);
-       list_del_init(&grp->list);
-       set->count--;
-       return grp;
-}
-
-static inline void tid_group_move(struct tid_group *group,
-                                 struct exp_tid_set *s1,
-                                 struct exp_tid_set *s2)
-{
-       tid_group_remove(group, s1);
-       tid_group_add_tail(group, s2);
-}
-
-int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd)
-{
-       struct hfi1_ctxtdata *uctxt = fd->uctxt;
-       struct hfi1_devdata *dd = fd->dd;
-       u32 tidbase;
-       u32 i;
-       struct tid_group *grp, *gptr;
-
-       exp_tid_group_init(&uctxt->tid_group_list);
-       exp_tid_group_init(&uctxt->tid_used_list);
-       exp_tid_group_init(&uctxt->tid_full_list);
-
-       tidbase = uctxt->expected_base;
-       for (i = 0; i < uctxt->expected_count /
-                    dd->rcv_entries.group_size; i++) {
-               grp = kzalloc(sizeof(*grp), GFP_KERNEL);
-               if (!grp)
-                       goto grp_failed;
-
-               grp->size = dd->rcv_entries.group_size;
-               grp->base = tidbase;
-               tid_group_add_tail(grp, &uctxt->tid_group_list);
-               tidbase += dd->rcv_entries.group_size;
-       }
-
-       return 0;
-
-grp_failed:
-       list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
-                                list) {
-               list_del_init(&grp->list);
-               kfree(grp);
-       }
-
-       return -ENOMEM;
-}
-
 /*
  * Initialize context and file private data needed for Expected
  * receive caching. This needs to be done after the context has
@@ -266,18 +174,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd)
        return ret;
 }
 
-void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt)
-{
-       struct tid_group *grp, *gptr;
-
-       list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
-                                list) {
-               list_del_init(&grp->list);
-               kfree(grp);
-       }
-       hfi1_clear_tids(uctxt);
-}
-
 void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
 {
        struct hfi1_ctxtdata *uctxt = fd->uctxt;
@@ -302,23 +198,6 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
        fd->entry_to_rb = NULL;
 }
 
-/*
- * Write an "empty" RcvArray entry.
- * This function exists so the TID registaration code can use it
- * to write to unused/unneeded entries and still take advantage
- * of the WC performance improvements. The HFI will ignore this
- * write to the RcvArray entry.
- */
-static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
-{
-       /*
-        * Doing the WC fill writes only makes sense if the device is
-        * present and the RcvArray has been mapped as WC memory.
-        */
-       if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc)
-               writeq(0, dd->rcvarray_wc + (index * 8));
-}
-
 /*
  * RcvArray entry allocation for Expected Receives is done by the
  * following algorithm:
index 5250c897298ded7b0004b09d07cc138182f7822d..1bdc61be53cb751d2b2e5759911c9768c685cf58 100644 (file)
 
 #include "hfi.h"
 
-#define EXP_TID_TIDLEN_MASK   0x7FFULL
-#define EXP_TID_TIDLEN_SHIFT  0
-#define EXP_TID_TIDCTRL_MASK  0x3ULL
-#define EXP_TID_TIDCTRL_SHIFT 20
-#define EXP_TID_TIDIDX_MASK   0x3FFULL
-#define EXP_TID_TIDIDX_SHIFT  22
-#define EXP_TID_GET(tid, field)        \
-       (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK)
+#include "exp_rcv.h"
 
-#define EXP_TID_SET(field, value)                      \
-       (((value) & EXP_TID_TID##field##_MASK) <<       \
-        EXP_TID_TID##field##_SHIFT)
-#define EXP_TID_CLEAR(tid, field) ({                                   \
-               (tid) &= ~(EXP_TID_TID##field##_MASK <<                 \
-                          EXP_TID_TID##field##_SHIFT);                 \
-               })
-#define EXP_TID_RESET(tid, field, value) do {                          \
-               EXP_TID_CLEAR(tid, field);                              \
-               (tid) |= EXP_TID_SET(field, (value));                   \
-       } while (0)
-
-void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt);
-int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd);
 int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd);
 void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd);
 int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
index d55339f5d73ba67b83feeead1a5a5d27d63b064e..8f7cfdd9e9ecfa7d0401f1d48e4fc9d06ed53730 100644 (file)
@@ -94,43 +94,13 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
 /* Number of BTH.PSN bits used for sequence number in expected rcvs */
 #define BTH_SEQ_MASK 0x7ffull
 
-/*
- * Define fields in the KDETH header so we can update the header
- * template.
- */
-#define KDETH_OFFSET_SHIFT        0
-#define KDETH_OFFSET_MASK         0x7fff
-#define KDETH_OM_SHIFT            15
-#define KDETH_OM_MASK             0x1
-#define KDETH_TID_SHIFT           16
-#define KDETH_TID_MASK            0x3ff
-#define KDETH_TIDCTRL_SHIFT       26
-#define KDETH_TIDCTRL_MASK        0x3
-#define KDETH_INTR_SHIFT          28
-#define KDETH_INTR_MASK           0x1
-#define KDETH_SH_SHIFT            29
-#define KDETH_SH_MASK             0x1
-#define KDETH_HCRC_UPPER_SHIFT    16
-#define KDETH_HCRC_UPPER_MASK     0xff
-#define KDETH_HCRC_LOWER_SHIFT    24
-#define KDETH_HCRC_LOWER_MASK     0xff
-
 #define AHG_KDETH_INTR_SHIFT 12
 #define AHG_KDETH_SH_SHIFT   13
+#define AHG_KDETH_ARRAY_SIZE  9
 
 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
 #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
 
-#define KDETH_GET(val, field)                                          \
-       (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK)
-#define KDETH_SET(dw, field, val) do {                                 \
-               u32 dwval = le32_to_cpu(dw);                            \
-               dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \
-               dwval |= (((val) & KDETH_##field##_MASK) << \
-                         KDETH_##field##_SHIFT);                       \
-               dw = cpu_to_le32(dwval);                                \
-       } while (0)
-
 #define AHG_HEADER_SET(arr, idx, dw, bit, width, value)                        \
        do {                                                            \
                if ((idx) < ARRAY_SIZE((arr)))                          \
@@ -141,23 +111,10 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
                        return -ERANGE;                                 \
        } while (0)
 
-/* KDETH OM multipliers and switch over point */
-#define KDETH_OM_SMALL     4
-#define KDETH_OM_SMALL_SHIFT     2
-#define KDETH_OM_LARGE     64
-#define KDETH_OM_LARGE_SHIFT     6
-#define KDETH_OM_MAX_SIZE  (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
-
 /* Tx request flag bits */
 #define TXREQ_FLAGS_REQ_ACK   BIT(0)      /* Set the ACK bit in the header */
 #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
 
-/* SDMA request flag bits */
-#define SDMA_REQ_FOR_THREAD 1
-#define SDMA_REQ_SEND_DONE  2
-#define SDMA_REQ_HAS_ERROR  3
-#define SDMA_REQ_DONE_ERROR 4
-
 #define SDMA_PKT_Q_INACTIVE BIT(0)
 #define SDMA_PKT_Q_ACTIVE   BIT(1)
 #define SDMA_PKT_Q_DEFERRED BIT(2)
@@ -204,25 +161,41 @@ struct evict_data {
 };
 
 struct user_sdma_request {
-       struct sdma_req_info info;
-       struct hfi1_user_sdma_pkt_q *pq;
-       struct hfi1_user_sdma_comp_q *cq;
        /* This is the original header from user space */
        struct hfi1_pkt_header hdr;
+
+       /* Read mostly fields */
+       struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp;
+       struct hfi1_user_sdma_comp_q *cq;
        /*
         * Pointer to the SDMA engine for this request.
         * Since different request could be on different VLs,
         * each request will need it's own engine pointer.
         */
        struct sdma_engine *sde;
-       s8 ahg_idx;
-       u32 ahg[9];
+       struct sdma_req_info info;
+       /* TID array values copied from the tid_iov vector */
+       u32 *tids;
+       /* total length of the data in the request */
+       u32 data_len;
+       /* number of elements copied to the tids array */
+       u16 n_tids;
        /*
-        * KDETH.Offset (Eager) field
-        * We need to remember the initial value so the headers
-        * can be updated properly.
+        * We copy the iovs for this request (based on
+        * info.iovcnt). These are only the data vectors
         */
-       u32 koffset;
+       u8 data_iovs;
+       s8 ahg_idx;
+
+       /* Writeable fields shared with interrupt */
+       u64 seqcomp ____cacheline_aligned_in_smp;
+       u64 seqsubmitted;
+       /* status of the last txreq completed */
+       int status;
+
+       /* Send side fields */
+       struct list_head txps ____cacheline_aligned_in_smp;
+       u64 seqnum;
        /*
         * KDETH.OFFSET (TID) field
         * The offset can cover multiple packets, depending on the
@@ -230,29 +203,21 @@ struct user_sdma_request {
         */
        u32 tidoffset;
        /*
-        * We copy the iovs for this request (based on
-        * info.iovcnt). These are only the data vectors
+        * KDETH.Offset (Eager) field
+        * We need to remember the initial value so the headers
+        * can be updated properly.
         */
-       unsigned data_iovs;
-       /* total length of the data in the request */
-       u32 data_len;
+       u32 koffset;
+       u32 sent;
+       /* TID index copied from the tid_iov vector */
+       u16 tididx;
        /* progress index moving along the iovs array */
-       unsigned iov_idx;
+       u8 iov_idx;
+       u8 done;
+       u8 has_error;
+
        struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
-       /* number of elements copied to the tids array */
-       u16 n_tids;
-       /* TID array values copied from the tid_iov vector */
-       u32 *tids;
-       u16 tididx;
-       u32 sent;
-       u64 seqnum;
-       u64 seqcomp;
-       u64 seqsubmitted;
-       struct list_head txps;
-       unsigned long flags;
-       /* status of the last txreq completed */
-       int status;
-};
+} ____cacheline_aligned_in_smp;
 
 /*
  * A single txreq could span up to 3 physical pages when the MTU
@@ -607,12 +572,20 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
        hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit,
                  uctxt->ctxt, fd->subctxt, info.comp_idx);
        req = pq->reqs + info.comp_idx;
-       memset(req, 0, sizeof(*req));
        req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
+       req->data_len  = 0;
        req->pq = pq;
        req->cq = cq;
        req->status = -1;
        req->ahg_idx = -1;
+       req->iov_idx = 0;
+       req->sent = 0;
+       req->seqnum = 0;
+       req->seqcomp = 0;
+       req->seqsubmitted = 0;
+       req->tids = NULL;
+       req->done = 0;
+       req->has_error = 0;
        INIT_LIST_HEAD(&req->txps);
 
        memcpy(&req->info, &info, sizeof(info));
@@ -701,12 +674,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
 
        /* Save all the IO vector structures */
        for (i = 0; i < req->data_iovs; i++) {
+               req->iovs[i].offset = 0;
                INIT_LIST_HEAD(&req->iovs[i].list);
                memcpy(&req->iovs[i].iov,
                       iovec + idx++,
                       sizeof(req->iovs[i].iov));
                ret = pin_vector_pages(req, &req->iovs[i]);
                if (ret) {
+                       req->data_iovs = i;
                        req->status = ret;
                        goto free_req;
                }
@@ -749,6 +724,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
                }
                req->tids = tmp;
                req->n_tids = ntids;
+               req->tididx = 0;
                idx++;
        }
 
@@ -791,12 +767,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
         * request have been submitted to the SDMA engine. However, it
         * will not wait for send completions.
         */
-       while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
+       while (req->seqsubmitted != req->info.npkts) {
                ret = user_sdma_send_pkts(req, pcount);
                if (ret < 0) {
                        if (ret != -EBUSY) {
                                req->status = ret;
-                               set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+                               WRITE_ONCE(req->has_error, 1);
                                if (ACCESS_ONCE(req->seqcomp) ==
                                    req->seqsubmitted - 1)
                                        goto free_req;
@@ -898,10 +874,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
        pq = req->pq;
 
        /* If tx completion has reported an error, we are done. */
-       if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
-               set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+       if (READ_ONCE(req->has_error))
                return -EFAULT;
-       }
 
        /*
         * Check if we might have sent the entire request already
@@ -924,10 +898,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
                 * with errors. If so, we are not going to process any
                 * more packets from this request.
                 */
-               if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
-                       set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+               if (READ_ONCE(req->has_error))
                        return -EFAULT;
-               }
 
                tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
                if (!tx)
@@ -1024,11 +996,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
                                                               datalen);
                                if (changes < 0)
                                        goto free_tx;
-                               sdma_txinit_ahg(&tx->txreq,
-                                               SDMA_TXREQ_F_USE_AHG,
-                                               datalen, req->ahg_idx, changes,
-                                               req->ahg, sizeof(req->hdr),
-                                               user_sdma_txreq_cb);
                        }
                } else {
                        ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
@@ -1105,7 +1072,7 @@ dosend:
        ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
        req->seqsubmitted += count;
        if (req->seqsubmitted == req->info.npkts) {
-               set_bit(SDMA_REQ_SEND_DONE, &req->flags);
+               WRITE_ONCE(req->done, 1);
                /*
                 * The txreq has already been submitted to the HW queue
                 * so we can free the AHG entry now. Corruption will not
@@ -1155,14 +1122,23 @@ static int pin_vector_pages(struct user_sdma_request *req,
        struct hfi1_user_sdma_pkt_q *pq = req->pq;
        struct sdma_mmu_node *node = NULL;
        struct mmu_rb_node *rb_node;
-
-       rb_node = hfi1_mmu_rb_extract(pq->handler,
-                                     (unsigned long)iovec->iov.iov_base,
-                                     iovec->iov.iov_len);
-       if (rb_node)
+       bool extracted;
+
+       extracted =
+               hfi1_mmu_rb_remove_unless_exact(pq->handler,
+                                               (unsigned long)
+                                               iovec->iov.iov_base,
+                                               iovec->iov.iov_len, &rb_node);
+       if (rb_node) {
                node = container_of(rb_node, struct sdma_mmu_node, rb);
-       else
-               rb_node = NULL;
+               if (!extracted) {
+                       atomic_inc(&node->refcount);
+                       iovec->pages = node->pages;
+                       iovec->npages = node->npages;
+                       iovec->node = node;
+                       return 0;
+               }
+       }
 
        if (!node) {
                node = kzalloc(sizeof(*node), GFP_KERNEL);
@@ -1423,21 +1399,22 @@ done:
 }
 
 static int set_txreq_header_ahg(struct user_sdma_request *req,
-                               struct user_sdma_txreq *tx, u32 len)
+                               struct user_sdma_txreq *tx, u32 datalen)
 {
+       u32 ahg[AHG_KDETH_ARRAY_SIZE];
        int diff = 0;
        u8 omfactor; /* KDETH.OM */
        struct hfi1_user_sdma_pkt_q *pq = req->pq;
        struct hfi1_pkt_header *hdr = &req->hdr;
        u16 pbclen = le16_to_cpu(hdr->pbc[0]);
-       u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len));
+       u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
 
        if (PBC2LRH(pbclen) != lrhlen) {
                /* PBC.PbcLengthDWs */
-               AHG_HEADER_SET(req->ahg, diff, 0, 0, 12,
+               AHG_HEADER_SET(ahg, diff, 0, 0, 12,
                               cpu_to_le16(LRH2PBC(lrhlen)));
                /* LRH.PktLen (we need the full 16 bits due to byte swap) */
-               AHG_HEADER_SET(req->ahg, diff, 3, 0, 16,
+               AHG_HEADER_SET(ahg, diff, 3, 0, 16,
                               cpu_to_be16(lrhlen >> 2));
        }
 
@@ -1449,13 +1426,12 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
                (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
        if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
                val32 |= 1UL << 31;
-       AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
-       AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
+       AHG_HEADER_SET(ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
+       AHG_HEADER_SET(ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
        /* KDETH.Offset */
-       AHG_HEADER_SET(req->ahg, diff, 15, 0, 16,
+       AHG_HEADER_SET(ahg, diff, 15, 0, 16,
                       cpu_to_le16(req->koffset & 0xffff));
-       AHG_HEADER_SET(req->ahg, diff, 15, 16, 16,
-                      cpu_to_le16(req->koffset >> 16));
+       AHG_HEADER_SET(ahg, diff, 15, 16, 16, cpu_to_le16(req->koffset >> 16));
        if (req_opcode(req->info.ctrl) == EXPECTED) {
                __le16 val;
 
@@ -1473,9 +1449,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
                         * we have to check again.
                         */
                        if (++req->tididx > req->n_tids - 1 ||
-                           !req->tids[req->tididx]) {
+                           !req->tids[req->tididx])
                                return -EINVAL;
-                       }
                        tidval = req->tids[req->tididx];
                }
                omfactor = ((EXP_TID_GET(tidval, LEN) *
@@ -1483,7 +1458,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
                                 KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
                                 KDETH_OM_SMALL_SHIFT;
                /* KDETH.OM and KDETH.OFFSET (TID) */
-               AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
+               AHG_HEADER_SET(ahg, diff, 7, 0, 16,
                               ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
                                ((req->tidoffset >> omfactor)
                                 & 0x7fff)));
@@ -1503,12 +1478,20 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
                                             AHG_KDETH_INTR_SHIFT));
                }
 
-               AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
+               AHG_HEADER_SET(ahg, diff, 7, 16, 14, val);
        }
+       if (diff < 0)
+               return diff;
 
        trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
                                        req->info.comp_idx, req->sde->this_idx,
-                                       req->ahg_idx, req->ahg, diff, tidval);
+                                       req->ahg_idx, ahg, diff, tidval);
+       sdma_txinit_ahg(&tx->txreq,
+                       SDMA_TXREQ_F_USE_AHG,
+                       datalen, req->ahg_idx, diff,
+                       ahg, sizeof(req->hdr),
+                       user_sdma_txreq_cb);
+
        return diff;
 }
 
@@ -1537,7 +1520,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
        if (status != SDMA_TXREQ_S_OK) {
                SDMA_DBG(req, "SDMA completion with error %d",
                         status);
-               set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
+               WRITE_ONCE(req->has_error, 1);
        }
 
        req->seqcomp = tx->seqnum;
@@ -1556,8 +1539,8 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
                if (status != SDMA_TXREQ_S_OK)
                        req->status = status;
                if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
-                   (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
-                    test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
+                   (READ_ONCE(req->done) ||
+                    READ_ONCE(req->has_error))) {
                        user_sdma_free_request(req, false);
                        pq_update(pq);
                        set_comp_state(pq, cq, idx, ERROR, req->status);
index 2d19f9bb434de5a0396f31996f20f4fd01eec179..9c9ded643ed44bee0ace051fe559a13a1fd17098 100644 (file)
@@ -508,13 +508,14 @@ again:
 /*
  * Make sure the QP is ready and able to accept the given opcode.
  */
-static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
+static inline opcode_handler qp_ok(struct hfi1_packet *packet)
 {
        if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
                return NULL;
-       if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
-           (opcode == IB_OPCODE_CNP))
-               return opcode_handler_tbl[opcode];
+       if (((packet->opcode & RVT_OPCODE_QP_MASK) ==
+            packet->qp->allowed_ops) ||
+           (packet->opcode == IB_OPCODE_CNP))
+               return opcode_handler_tbl[packet->opcode];
 
        return NULL;
 }
@@ -548,69 +549,34 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
        return pbc;
 }
 
-/**
- * hfi1_ib_rcv - process an incoming packet
- * @packet: data packet information
- *
- * This is called to process an incoming packet at interrupt level.
- *
- * Tlen is the length of the header + data + CRC in bytes.
- */
-void hfi1_ib_rcv(struct hfi1_packet *packet)
+static inline void hfi1_handle_packet(struct hfi1_packet *packet,
+                                     bool is_mcast)
 {
+       u32 qp_num;
        struct hfi1_ctxtdata *rcd = packet->rcd;
-       struct ib_header *hdr = packet->hdr;
-       u32 tlen = packet->tlen;
        struct hfi1_pportdata *ppd = rcd->ppd;
        struct hfi1_ibport *ibp = rcd_to_iport(rcd);
        struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
        opcode_handler packet_handler;
        unsigned long flags;
-       u32 qp_num;
-       int lnh;
-       u8 opcode;
-       u16 lid;
-
-       /* Check for GRH */
-       lnh = ib_get_lnh(hdr);
-       if (lnh == HFI1_LRH_BTH) {
-               packet->ohdr = &hdr->u.oth;
-       } else if (lnh == HFI1_LRH_GRH) {
-               u32 vtf;
-
-               packet->ohdr = &hdr->u.l.oth;
-               if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
-                       goto drop;
-               vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
-               if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
-                       goto drop;
-               packet->rcv_flags |= HFI1_HAS_GRH;
-       } else {
-               goto drop;
-       }
 
-       trace_input_ibhdr(rcd->dd, hdr);
+       inc_opstats(packet->tlen, &rcd->opstats->stats[packet->opcode]);
 
-       opcode = ib_bth_get_opcode(packet->ohdr);
-       inc_opstats(tlen, &rcd->opstats->stats[opcode]);
-
-       /* Get the destination QP number. */
-       qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
-       lid = ib_get_dlid(hdr);
-       if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
-                    (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
+       if (unlikely(is_mcast)) {
                struct rvt_mcast *mcast;
                struct rvt_mcast_qp *p;
 
-               if (lnh != HFI1_LRH_GRH)
+               if (!packet->grh)
                        goto drop;
-               mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
+               mcast = rvt_mcast_find(&ibp->rvp,
+                                      &packet->grh->dgid,
+                                      packet->dlid);
                if (!mcast)
                        goto drop;
                list_for_each_entry_rcu(p, &mcast->qp_list, list) {
                        packet->qp = p->qp;
                        spin_lock_irqsave(&packet->qp->r_lock, flags);
-                       packet_handler = qp_ok(opcode, packet);
+                       packet_handler = qp_ok(packet);
                        if (likely(packet_handler))
                                packet_handler(packet);
                        else
@@ -624,19 +590,21 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
                if (atomic_dec_return(&mcast->refcount) <= 1)
                        wake_up(&mcast->wait);
        } else {
+               /* Get the destination QP number. */
+               qp_num = ib_bth_get_qpn(packet->ohdr);
                rcu_read_lock();
                packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
                if (!packet->qp) {
                        rcu_read_unlock();
                        goto drop;
                }
-               if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode,
+               if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode,
                                                   true))) {
                        rcu_read_unlock();
                        goto drop;
                }
                spin_lock_irqsave(&packet->qp->r_lock, flags);
-               packet_handler = qp_ok(opcode, packet);
+               packet_handler = qp_ok(packet);
                if (likely(packet_handler))
                        packet_handler(packet);
                else
@@ -645,11 +613,29 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
                rcu_read_unlock();
        }
        return;
-
 drop:
        ibp->rvp.n_pkt_drops++;
 }
 
+/**
+ * hfi1_ib_rcv - process an incoming packet
+ * @packet: data packet information
+ *
+ * This is called to process an incoming packet at interrupt level.
+ */
+void hfi1_ib_rcv(struct hfi1_packet *packet)
+{
+       struct hfi1_ctxtdata *rcd = packet->rcd;
+       bool is_mcast = false;
+
+       if (unlikely(hfi1_check_mcast(packet->dlid)))
+               is_mcast = true;
+
+       trace_input_ibhdr(rcd->dd, packet,
+                         !!(packet->rhf & RHF_DC_INFO_SMASK));
+       hfi1_handle_packet(packet, is_mcast);
+}
+
 /*
  * This is called from a timer to check for QPs
  * which need kernel memory in order to send a packet.
@@ -863,7 +849,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 
                        /* No vl15 here */
                        /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
-                       pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+                       pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
 
                        if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
                                pbc = hfi1_fault_tx(qp, opcode, pbc);
@@ -885,7 +871,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
                return ret;
        }
        trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
-                               &ps->s_txreq->phdr.hdr);
+                               &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
        return ret;
 
 bail_ecomm:
@@ -999,7 +985,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
                u8 opcode = get_opcode(&tx->phdr.hdr);
 
                /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
-               pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+               pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
                if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
                        pbc = hfi1_fault_tx(qp, opcode, pbc);
                pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
@@ -1058,7 +1044,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
        }
 
        trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
-                              &ps->s_txreq->phdr.hdr);
+                              &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
 
 pio_bail:
        if (qp->s_wqe) {
@@ -1368,7 +1354,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
        props->lmc = ppd->lmc;
        /* OPA logical states match IB logical states */
        props->state = driver_lstate(ppd);
-       props->phys_state = hfi1_ibphys_portstate(ppd);
+       props->phys_state = driver_pstate(ppd);
        props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
        props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
        /* see rate_show() in ib core/sysfs.c */
@@ -1551,9 +1537,13 @@ static void init_ibport(struct hfi1_pportdata *ppd)
        /* Set the prefix to the default value (see ch. 4.1.1) */
        ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
        ibp->rvp.sm_lid = 0;
-       /* Below should only set bits defined in OPA PortInfo.CapabilityMask */
+       /*
+        * Below should only set bits defined in OPA PortInfo.CapabilityMask
+        * and PortInfo.CapabilityMask3
+        */
        ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
                IB_PORT_CAP_MASK_NOTICE_SUP;
+       ibp->rvp.port_cap3_flags = OPA_CAP_MASK3_IsSharedSpaceSupported;
        ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
        ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
        ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
index cd635d0c1d3b32eb8fbd3a0303e03f532eca4a09..fdf1e1fb880c401b8b5b36c69bddd41e6ae9cafd 100644 (file)
@@ -236,8 +236,8 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
 /*
  * This must be called with s_lock held.
  */
-void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
-                   u32 qp1, u32 qp2, u16 lid1, u16 lid2);
+void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
+                  u32 qp1, u32 qp2, u16 lid1, u16 lid2);
 void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp);
 void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
@@ -307,8 +307,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet);
 
 void hfi1_rc_hdrerr(
        struct hfi1_ctxtdata *rcd,
-       struct ib_header *hdr,
-       u32 rcv_flags,
+       struct hfi1_packet *packet,
        struct rvt_qp *qp);
 
 u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
@@ -346,8 +345,7 @@ static inline u8 get_opcode(struct ib_header *h)
                return be32_to_cpu(h->u.l.oth.bth[0]) >> 24;
 }
 
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
-                      int has_grh, struct rvt_qp *qp, u32 bth0);
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet);
 
 u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
                  const struct ib_global_route *grh, u32 hwords, u32 nwords);
index 339f0cdd56d6bb30f1910245d0b4b493a0090e23..5a3f80ba9752d7c4edf6ec943af413d97c310c8e 100644 (file)
@@ -156,11 +156,11 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
        return ret;
 bail:
        /*
-        * hfi1_free_ctxtdata() also releases send_context
-        * structure if uctxt->sc is not null
+        * hfi1_rcd_put() will call hfi1_free_ctxtdata(), which will
+        * release send_context structure if uctxt->sc is not null
         */
        dd->rcd[uctxt->ctxt] = NULL;
-       hfi1_free_ctxtdata(dd, uctxt);
+       hfi1_rcd_put(uctxt);
        dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
        return ret;
 }
@@ -208,7 +208,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
        hfi1_clear_ctxt_pkey(dd, uctxt);
 
        hfi1_stats.sps_ctxts--;
-       hfi1_free_ctxtdata(dd, uctxt);
+       hfi1_rcd_put(uctxt);
 }
 
 void hfi1_vnic_setup(struct hfi1_devdata *dd)
@@ -751,6 +751,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
                rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
                if (rc)
                        break;
+               hfi1_rcd_get(dd->vnic.ctxt[i]);
                dd->vnic.ctxt[i]->vnic_q_idx = i;
        }
 
@@ -762,6 +763,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
                 */
                while (i-- > dd->vnic.num_ctxt) {
                        deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+                       hfi1_rcd_put(dd->vnic.ctxt[i]);
                        dd->vnic.ctxt[i] = NULL;
                }
                goto alloc_fail;
@@ -791,6 +793,7 @@ static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
        if (--dd->vnic.num_vports == 0) {
                for (i = 0; i < dd->vnic.num_ctxt; i++) {
                        deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+                       hfi1_rcd_put(dd->vnic.ctxt[i]);
                        dd->vnic.ctxt[i] = NULL;
                }
                hfi1_deinit_vnic_rsm(dd);
index a3e21a25cea5b38c1723ebbafec21be0ea1c69fa..f9e1c69603a5626c5f091bd687c0947e9e67d9ac 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _QIB_KERNEL_H
 #define _QIB_KERNEL_H
 /*
- * Copyright (c) 2012, 2013 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012 - 2017 Intel Corporation.  All rights reserved.
  * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
@@ -443,7 +443,7 @@ struct qib_irq_notify;
 #endif
 
 struct qib_msix_entry {
-       struct msix_entry msix;
+       int irq;
        void *arg;
 #ifdef CONFIG_INFINIBAND_QIB_DCA
        int dca;
@@ -1433,9 +1433,9 @@ int qib_pcie_init(struct pci_dev *, const struct pci_device_id *);
 int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *,
                    const struct pci_device_id *);
 void qib_pcie_ddcleanup(struct qib_devdata *);
-int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *);
+int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent);
 int qib_reinit_intr(struct qib_devdata *);
-void qib_enable_intx(struct pci_dev *);
+void qib_enable_intx(struct qib_devdata *dd);
 void qib_nomsi(struct qib_devdata *);
 void qib_nomsix(struct qib_devdata *);
 void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *);
index e423b71e6ea06fc86af5385fc1b27b0a5d5ad06b..46045fc28fa0188c2ff6b6762eca757d87f49091 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2013 - 2017 Intel Corporation. All rights reserved.
  * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
  * All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
@@ -1838,7 +1838,7 @@ static int qib_6120_setup_reset(struct qib_devdata *dd)
 
 bail:
        if (ret) {
-               if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL))
+               if (qib_pcie_params(dd, dd->lbus_width, NULL))
                        qib_dev_err(dd,
                                "Reset failed to setup PCIe or interrupts; continuing anyway\n");
                /* clear the reset error, init error/hwerror mask */
@@ -3562,7 +3562,7 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev,
        if (qib_mini_init)
                goto bail;
 
-       if (qib_pcie_params(dd, 8, NULL, NULL))
+       if (qib_pcie_params(dd, 8, NULL))
                qib_dev_err(dd,
                        "Failed to setup PCIe or interrupts; continuing anyway\n");
        dd->cspec->irq = pdev->irq; /* save IRQ */
index c3679c48e61cafc689a6ff7a215212f26a581e4c..49cd6e3beb7274db40e9342f9b6853f03de12a0c 100644 (file)
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2011 - 2017 Intel Corporation.  All rights reserved.
  * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
  * All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
@@ -2148,7 +2149,7 @@ static int qib_setup_7220_reset(struct qib_devdata *dd)
 
 bail:
        if (ret) {
-               if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL))
+               if (qib_pcie_params(dd, dd->lbus_width, NULL))
                        qib_dev_err(dd,
                                "Reset failed to setup PCIe or interrupts; continuing anyway\n");
 
@@ -3309,7 +3310,7 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd)
        qib_devinfo(dd->pcidev,
                "MSI interrupt not detected, trying INTx interrupts\n");
        qib_7220_free_irq(dd);
-       qib_enable_intx(dd->pcidev);
+       qib_enable_intx(dd);
        /*
         * Some newer kernels require free_irq before disable_msi,
         * and irq can be changed during disable and INTx enable
@@ -4619,7 +4620,7 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev,
                minwidth = 8; /* x8 capable boards */
                break;
        }
-       if (qib_pcie_params(dd, minwidth, NULL, NULL))
+       if (qib_pcie_params(dd, minwidth, NULL))
                qib_dev_err(dd,
                        "Failed to setup PCIe or interrupts; continuing anyway\n");
 
index bb2439fff8fa6bc7b7dfad3253a9a56af44c2b26..2653064ce9e91a0dc094f14243a891bd80d58320 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012 - 2017 Intel Corporation.  All rights reserved.
  * Copyright (c) 2008 - 2012 QLogic Corporation. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -2841,10 +2841,10 @@ static void qib_7322_nomsix(struct qib_devdata *dd)
                        reset_dca_notifier(dd, &dd->cspec->msix_entries[i]);
 #endif
                        irq_set_affinity_hint(
-                         dd->cspec->msix_entries[i].msix.vector, NULL);
+                               dd->cspec->msix_entries[i].irq, NULL);
                        free_cpumask_var(dd->cspec->msix_entries[i].mask);
-                       free_irq(dd->cspec->msix_entries[i].msix.vector,
-                          dd->cspec->msix_entries[i].arg);
+                       free_irq(dd->cspec->msix_entries[i].irq,
+                                dd->cspec->msix_entries[i].arg);
                }
                qib_nomsix(dd);
        }
@@ -3336,9 +3336,9 @@ static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
        qib_devinfo(dd->pcidev,
                "Disabling notifier on HCA %d irq %d\n",
                dd->unit,
-               m->msix.vector);
+               m->irq);
        irq_set_affinity_notifier(
-               m->msix.vector,
+               m->irq,
                NULL);
        m->notifier = NULL;
 }
@@ -3354,7 +3354,7 @@ static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
                int ret;
 
                m->notifier = n;
-               n->notify.irq = m->msix.vector;
+               n->notify.irq = m->irq;
                n->notify.notify = qib_irq_notifier_notify;
                n->notify.release = qib_irq_notifier_release;
                n->arg = m->arg;
@@ -3500,10 +3500,21 @@ try_intx:
                                 - 1,
                                QIB_DRV_NAME "%d (kctx)", dd->unit);
                }
-               ret = request_irq(
-                       dd->cspec->msix_entries[msixnum].msix.vector,
-                       handler, 0, dd->cspec->msix_entries[msixnum].name,
-                       arg);
+
+               dd->cspec->msix_entries[msixnum].irq = pci_irq_vector(
+                       dd->pcidev, msixnum);
+               if (dd->cspec->msix_entries[msixnum].irq < 0) {
+                       qib_dev_err(dd,
+                                   "Couldn't get MSIx irq (vec=%d): %d\n",
+                                   msixnum,
+                                   dd->cspec->msix_entries[msixnum].irq);
+                       qib_7322_nomsix(dd);
+                       goto try_intx;
+               }
+               ret = request_irq(dd->cspec->msix_entries[msixnum].irq,
+                                 handler, 0,
+                                 dd->cspec->msix_entries[msixnum].name,
+                                 arg);
                if (ret) {
                        /*
                         * Shouldn't happen since the enable said we could
@@ -3512,7 +3523,7 @@ try_intx:
                        qib_dev_err(dd,
                                "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n",
                                msixnum,
-                               dd->cspec->msix_entries[msixnum].msix.vector,
+                               dd->cspec->msix_entries[msixnum].irq,
                                ret);
                        qib_7322_nomsix(dd);
                        goto try_intx;
@@ -3548,7 +3559,7 @@ try_intx:
                                        dd->cspec->msix_entries[msixnum].mask);
                        }
                        irq_set_affinity_hint(
-                               dd->cspec->msix_entries[msixnum].msix.vector,
+                               dd->cspec->msix_entries[msixnum].irq,
                                dd->cspec->msix_entries[msixnum].mask);
                }
                msixnum++;
@@ -3744,7 +3755,6 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
        if (msix_entries) {
                /* restore the MSIx vector address and data if saved above */
                for (i = 0; i < msix_entries; i++) {
-                       dd->cspec->msix_entries[i].msix.entry = i;
                        if (!msix_vecsave || !msix_vecsave[2 * i])
                                continue;
                        qib_write_kreg(dd, 2 * i +
@@ -3762,8 +3772,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
        write_7322_initregs(dd);
 
        if (qib_pcie_params(dd, dd->lbus_width,
-                           &dd->cspec->num_msix_entries,
-                           dd->cspec->msix_entries))
+                           &dd->cspec->num_msix_entries))
                qib_dev_err(dd,
                        "Reset failed to setup PCIe or interrupts; continuing anyway\n");
 
@@ -5195,7 +5204,7 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd)
        qib_devinfo(dd->pcidev,
                "MSIx interrupt not detected, trying INTx interrupts\n");
        qib_7322_nomsix(dd);
-       qib_enable_intx(dd->pcidev);
+       qib_enable_intx(dd);
        qib_setup_7322_interrupt(dd, 0);
        return 1;
 }
@@ -7327,10 +7336,7 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
        if (!dd->cspec->msix_entries)
                tabsize = 0;
 
-       for (i = 0; i < tabsize; i++)
-               dd->cspec->msix_entries[i].msix.entry = i;
-
-       if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries))
+       if (qib_pcie_params(dd, 8, &tabsize))
                qib_dev_err(dd,
                        "Failed to setup PCIe or interrupts; continuing anyway\n");
        /* may be less than we wanted, if not enough available */
index da295e0392ed8446db60a7666a4faa54d234baf9..a4a7f2a76f24ec747e3307c12ac511d1c7463bb2 100644 (file)
@@ -134,24 +134,21 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
 }
 
 /*
- * Send a bad [PQ]_Key trap (ch. 14.3.8).
+ * Send a bad P_Key trap (ch. 14.3.8).
  */
-void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
-                  u32 qp1, u32 qp2, __be16 lid1, __be16 lid2)
+void qib_bad_pkey(struct qib_ibport *ibp, u32 key, u32 sl,
+                 u32 qp1, u32 qp2, __be16 lid1, __be16 lid2)
 {
        struct ib_mad_notice_attr data;
 
-       if (trap_num == IB_NOTICE_TRAP_BAD_PKEY)
-               ibp->rvp.pkey_violations++;
-       else
-               ibp->rvp.qkey_violations++;
        ibp->rvp.n_pkt_drops++;
+       ibp->rvp.pkey_violations++;
 
        /* Send violation trap */
        data.generic_type = IB_NOTICE_TYPE_SECURITY;
        data.prod_type_msb = 0;
        data.prod_type_lsb = IB_NOTICE_PROD_CA;
-       data.trap_num = trap_num;
+       data.trap_num = IB_NOTICE_TRAP_BAD_PKEY;
        data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid);
        data.toggle_count = 0;
        memset(&data.details, 0, sizeof(data.details));
index c379b8342a0909ec9a3f086cd96297b220e2a2a5..d90403e31a9d3f8ed0e79636830ac68546b2a48e 100644 (file)
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2010 - 2017 Intel Corporation.  All rights reserved.
  * Copyright (c) 2008, 2009 QLogic Corporation. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -187,112 +188,84 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd)
        pci_set_drvdata(dd->pcidev, NULL);
 }
 
-static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt,
-                          struct qib_msix_entry *qib_msix_entry)
-{
-       int ret;
-       int nvec = *msixcnt;
-       struct msix_entry *msix_entry;
-       int i;
-
-       ret = pci_msix_vec_count(dd->pcidev);
-       if (ret < 0)
-               goto do_intx;
-
-       nvec = min(nvec, ret);
-
-       /* We can't pass qib_msix_entry array to qib_msix_setup
-        * so use a dummy msix_entry array and copy the allocated
-        * irq back to the qib_msix_entry array. */
-       msix_entry = kcalloc(nvec, sizeof(*msix_entry), GFP_KERNEL);
-       if (!msix_entry)
-               goto do_intx;
-
-       for (i = 0; i < nvec; i++)
-               msix_entry[i] = qib_msix_entry[i].msix;
-
-       ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec);
-       if (ret < 0)
-               goto free_msix_entry;
-       else
-               nvec = ret;
-
-       for (i = 0; i < nvec; i++)
-               qib_msix_entry[i].msix = msix_entry[i];
-
-       kfree(msix_entry);
-       *msixcnt = nvec;
-       return;
-
-free_msix_entry:
-       kfree(msix_entry);
-
-do_intx:
-       qib_dev_err(
-               dd,
-               "pci_enable_msix_range %d vectors failed: %d, falling back to INTx\n",
-               nvec, ret);
-       *msixcnt = 0;
-       qib_enable_intx(dd->pcidev);
-}
-
 /**
  * We save the msi lo and hi values, so we can restore them after
  * chip reset (the kernel PCI infrastructure doesn't yet handle that
  * correctly.
  */
-static int qib_msi_setup(struct qib_devdata *dd, int pos)
+static void qib_msi_setup(struct qib_devdata *dd, int pos)
 {
        struct pci_dev *pdev = dd->pcidev;
        u16 control;
-       int ret;
 
-       ret = pci_enable_msi(pdev);
-       if (ret)
-               qib_dev_err(dd,
-                       "pci_enable_msi failed: %d, interrupts may not work\n",
-                       ret);
-       /* continue even if it fails, we may still be OK... */
-
-       pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO,
-                             &dd->msi_lo);
-       pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI,
-                             &dd->msi_hi);
+       pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, &dd->msi_lo);
+       pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI, &dd->msi_hi);
        pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &control);
+
        /* now save the data (vector) info */
-       pci_read_config_word(pdev, pos + ((control & PCI_MSI_FLAGS_64BIT)
-                                   ? 12 : 8),
+       pci_read_config_word(pdev,
+                            pos + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
                             &dd->msi_data);
-       return ret;
 }
 
-int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent,
-                   struct qib_msix_entry *entry)
+static int qib_allocate_irqs(struct qib_devdata *dd, u32 maxvec)
+{
+       unsigned int flags = PCI_IRQ_LEGACY;
+
+       /* Check our capabilities */
+       if (dd->pcidev->msix_cap) {
+               flags |= PCI_IRQ_MSIX;
+       } else {
+               if (dd->pcidev->msi_cap) {
+                       flags |= PCI_IRQ_MSI;
+                       /* Get msi_lo and msi_hi */
+                       qib_msi_setup(dd, dd->pcidev->msi_cap);
+               }
+       }
+
+       if (!(flags & (PCI_IRQ_MSIX | PCI_IRQ_MSI)))
+               qib_dev_err(dd, "No PCI MSI or MSIx capability!\n");
+
+       return pci_alloc_irq_vectors(dd->pcidev, 1, maxvec, flags);
+}
+
+int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent)
 {
        u16 linkstat, speed;
-       int pos = 0, ret = 1;
+       int nvec;
+       int maxvec;
+       int ret = 0;
 
        if (!pci_is_pcie(dd->pcidev)) {
                qib_dev_err(dd, "Can't find PCI Express capability!\n");
                /* set up something... */
                dd->lbus_width = 1;
                dd->lbus_speed = 2500; /* Gen1, 2.5GHz */
+               ret = -1;
                goto bail;
        }
 
-       pos = dd->pcidev->msix_cap;
-       if (nent && *nent && pos) {
-               qib_msix_setup(dd, pos, nent, entry);
-               ret = 0; /* did it, either MSIx or INTx */
-       } else {
-               pos = dd->pcidev->msi_cap;
-               if (pos)
-                       ret = qib_msi_setup(dd, pos);
-               else
-                       qib_dev_err(dd, "No PCI MSI or MSIx capability!\n");
+       maxvec = (nent && *nent) ? *nent : 1;
+       nvec = qib_allocate_irqs(dd, maxvec);
+       if (nvec < 0) {
+               ret = nvec;
+               goto bail;
+       }
+
+       /*
+        * If nent exists, make sure to record how many vectors were allocated
+        */
+       if (nent) {
+               *nent = nvec;
+
+               /*
+                * If we requested (nent) MSIX, but msix_enabled is not set,
+                * pci_alloc_irq_vectors() enabled INTx.
+                */
+               if (!dd->pcidev->msix_enabled)
+                       qib_dev_err(dd,
+                                   "no msix vectors allocated, using INTx\n");
        }
-       if (!pos)
-               qib_enable_intx(dd->pcidev);
 
        pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat);
        /*
@@ -379,7 +352,7 @@ int qib_reinit_intr(struct qib_devdata *dd)
        ret = 1;
 bail:
        if (!ret && (dd->flags & QIB_HAS_INTX)) {
-               qib_enable_intx(dd->pcidev);
+               qib_enable_intx(dd);
                ret = 1;
        }
 
@@ -397,7 +370,7 @@ bail:
 void qib_nomsi(struct qib_devdata *dd)
 {
        dd->msi_lo = 0;
-       pci_disable_msi(dd->pcidev);
+       pci_free_irq_vectors(dd->pcidev);
 }
 
 /*
@@ -405,23 +378,21 @@ void qib_nomsi(struct qib_devdata *dd)
  */
 void qib_nomsix(struct qib_devdata *dd)
 {
-       pci_disable_msix(dd->pcidev);
+       pci_free_irq_vectors(dd->pcidev);
 }
 
 /*
  * Similar to pci_intx(pdev, 1), except that we make sure
  * msi(x) is off.
  */
-void qib_enable_intx(struct pci_dev *pdev)
+void qib_enable_intx(struct qib_devdata *dd)
 {
        u16 cw, new;
        int pos;
+       struct pci_dev *pdev = dd->pcidev;
 
-       /* first, turn on INTx */
-       pci_read_config_word(pdev, PCI_COMMAND, &cw);
-       new = cw & ~PCI_COMMAND_INTX_DISABLE;
-       if (new != cw)
-               pci_write_config_word(pdev, PCI_COMMAND, new);
+       if (pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_LEGACY) < 0)
+               qib_dev_err(dd, "Failed to enable INTx\n");
 
        pos = pdev->msi_cap;
        if (pos) {
index bd09de7c6e56c023fb2b93b1d7a8210887e62526..28528459a0524724cb64405bac1ef0d5f51a4b2b 100644 (file)
@@ -59,7 +59,7 @@ static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
                        continue;
                /* Check LKEY */
                if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
-                                &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+                                NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
                        goto bad_lkey;
                qp->r_len += wqe->sg_list[i].length;
                j++;
@@ -256,11 +256,11 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
                }
                if (!qib_pkey_ok((u16)bth0,
                                 qib_get_pkey(ibp, qp->s_alt_pkey_index))) {
-                       qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
-                                     (u16)bth0,
-                                     (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
-                                     0, qp->ibqp.qp_num,
-                                     hdr->lrh[3], hdr->lrh[1]);
+                       qib_bad_pkey(ibp,
+                                    (u16)bth0,
+                                    (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+                                    0, qp->ibqp.qp_num,
+                                    hdr->lrh[3], hdr->lrh[1]);
                        goto err;
                }
                /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
@@ -295,11 +295,11 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
                }
                if (!qib_pkey_ok((u16)bth0,
                                 qib_get_pkey(ibp, qp->s_pkey_index))) {
-                       qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
-                                     (u16)bth0,
-                                     (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
-                                     0, qp->ibqp.qp_num,
-                                     hdr->lrh[3], hdr->lrh[1]);
+                       qib_bad_pkey(ibp,
+                                    (u16)bth0,
+                                    (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+                                    0, qp->ibqp.qp_num,
+                                    hdr->lrh[3], hdr->lrh[1]);
                        goto err;
                }
                /* Validate the SLID. See Ch. 9.6.1.5 */
index 341a123ee95ca5afaaef9ff0ab7cb6a6513cbec4..be4907453ac4d031f2b1e7d8ed5ddd09235b0837 100644 (file)
@@ -66,8 +66,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
        qp = rvt_lookup_qpn(rdi, &ibp->rvp, swqe->ud_wr.remote_qpn);
        if (!qp) {
                ibp->rvp.n_pkt_drops++;
-               rcu_read_unlock();
-               return;
+               goto drop;
        }
 
        sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ?
@@ -94,11 +93,11 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
                if (unlikely(!qib_pkey_ok(pkey1, pkey2))) {
                        lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
                                          ((1 << ppd->lmc) - 1));
-                       qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, pkey1,
-                                     rdma_ah_get_sl(ah_attr),
-                                     sqp->ibqp.qp_num, qp->ibqp.qp_num,
-                                     cpu_to_be16(lid),
-                                     cpu_to_be16(rdma_ah_get_dlid(ah_attr)));
+                       qib_bad_pkey(ibp, pkey1,
+                                    rdma_ah_get_sl(ah_attr),
+                                    sqp->ibqp.qp_num, qp->ibqp.qp_num,
+                                    cpu_to_be16(lid),
+                                    cpu_to_be16(rdma_ah_get_dlid(ah_attr)));
                        goto drop;
                }
        }
@@ -113,18 +112,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
 
                qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
                        sqp->qkey : swqe->ud_wr.remote_qkey;
-               if (unlikely(qkey != qp->qkey)) {
-                       u16 lid;
-
-                       lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
-                                         ((1 << ppd->lmc) - 1));
-                       qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey,
-                                     rdma_ah_get_sl(ah_attr),
-                                     sqp->ibqp.qp_num, qp->ibqp.qp_num,
-                                     cpu_to_be16(lid),
-                                     cpu_to_be16(rdma_ah_get_dlid(ah_attr)));
+               if (unlikely(qkey != qp->qkey))
                        goto drop;
-               }
        }
 
        /*
@@ -487,22 +476,18 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
                        pkey1 = be32_to_cpu(ohdr->bth[0]);
                        pkey2 = qib_get_pkey(ibp, qp->s_pkey_index);
                        if (unlikely(!qib_pkey_ok(pkey1, pkey2))) {
-                               qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
-                                             pkey1,
-                                             (be16_to_cpu(hdr->lrh[0]) >> 4) &
+                               qib_bad_pkey(ibp,
+                                            pkey1,
+                                            (be16_to_cpu(hdr->lrh[0]) >> 4) &
                                                0xF,
-                                             src_qp, qp->ibqp.qp_num,
-                                             hdr->lrh[3], hdr->lrh[1]);
+                                            src_qp, qp->ibqp.qp_num,
+                                            hdr->lrh[3], hdr->lrh[1]);
                                return;
                        }
                }
-               if (unlikely(qkey != qp->qkey)) {
-                       qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey,
-                                     (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
-                                     src_qp, qp->ibqp.qp_num,
-                                     hdr->lrh[3], hdr->lrh[1]);
+               if (unlikely(qkey != qp->qkey))
                        return;
-               }
+
                /* Drop invalid MAD packets (see 13.5.3.1). */
                if (unlikely(qp->ibqp.qp_num == 1 &&
                             (tlen != 256 ||
index a52fc67b40d73ab3e7e3c6659bf050cd16a0035d..95e370192948b462213e5a46ad15b46b714370f4 100644 (file)
@@ -241,8 +241,8 @@ static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
        return p1 && p1 == p2 && ((__s16)pkey1 < 0 || (__s16)pkey2 < 0);
 }
 
-void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
-                  u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
+void qib_bad_pkey(struct qib_ibport *ibp, u32 key, u32 sl,
+                 u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
 void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
 void qib_sys_guid_chg(struct qib_ibport *ibp);
 void qib_node_desc_chg(struct qib_ibport *ibp);
index aa5f9ea318e45ab0ecd7169c0f14a675ffc121a3..ea95672d967515337d3ab02044a7421a78d221a6 100644 (file)
@@ -777,24 +777,55 @@ out:
        return ret;
 }
 
+/**
+ * rvt_sge_adjacent - is isge compressible
+ * @isge: outgoing internal SGE
+ * @last_sge: last outgoing SGE written
+ * @sge: SGE to check
+ *
+ * If adjacent will update last_sge to add length.
+ *
+ * Return: true if isge is adjacent to last sge
+ */
+static inline bool rvt_sge_adjacent(struct rvt_sge *isge,
+                                   struct rvt_sge *last_sge,
+                                   struct ib_sge *sge)
+{
+       if (last_sge && sge->lkey == last_sge->mr->lkey &&
+           ((uint64_t)(last_sge->vaddr + last_sge->length) == sge->addr)) {
+               if (sge->lkey) {
+                       if (unlikely((sge->addr - last_sge->mr->user_base +
+                             sge->length > last_sge->mr->length)))
+                               return false; /* overrun, caller will catch */
+               } else {
+                       last_sge->length += sge->length;
+               }
+               last_sge->sge_length += sge->length;
+               trace_rvt_sge_adjacent(last_sge, sge);
+               return true;
+       }
+       return false;
+}
+
 /**
  * rvt_lkey_ok - check IB SGE for validity and initialize
  * @rkt: table containing lkey to check SGE against
  * @pd: protection domain
  * @isge: outgoing internal SGE
+ * @last_sge: last outgoing SGE written
  * @sge: SGE to check
  * @acc: access flags
  *
  * Check the IB SGE for validity and initialize our internal version
  * of it.
  *
- * Return: 1 if valid and successful, otherwise returns 0.
- *
- * increments the reference count upon success
+ * Increments the reference count when a new sge is stored.
  *
+ * Return: 0 if compressed, 1 if added , otherwise returns -errno.
  */
 int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
-               struct rvt_sge *isge, struct ib_sge *sge, int acc)
+               struct rvt_sge *isge, struct rvt_sge *last_sge,
+               struct ib_sge *sge, int acc)
 {
        struct rvt_mregion *mr;
        unsigned n, m;
@@ -804,12 +835,14 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
         * We use LKEY == zero for kernel virtual addresses
         * (see rvt_get_dma_mr() and dma_virt_ops).
         */
-       rcu_read_lock();
        if (sge->lkey == 0) {
                struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
 
                if (pd->user)
-                       goto bail;
+                       return -EINVAL;
+               if (rvt_sge_adjacent(isge, last_sge, sge))
+                       return 0;
+               rcu_read_lock();
                mr = rcu_dereference(dev->dma_mr);
                if (!mr)
                        goto bail;
@@ -824,6 +857,9 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
                isge->n = 0;
                goto ok;
        }
+       if (rvt_sge_adjacent(isge, last_sge, sge))
+               return 0;
+       rcu_read_lock();
        mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
        if (!mr)
                goto bail;
@@ -874,12 +910,13 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
        isge->m = m;
        isge->n = n;
 ok:
+       trace_rvt_sge_new(isge, sge);
        return 1;
 bail_unref:
        rvt_put_mr(mr);
 bail:
        rcu_read_unlock();
-       return 0;
+       return -EINVAL;
 }
 EXPORT_SYMBOL(rvt_lkey_ok);
 
index 8876ee7bc326c9d5a05f438830ec1ba0740eebef..740611e4692ab100fdb70eecd054967385ee73bd 100644 (file)
@@ -421,15 +421,6 @@ bail:
        return ret;
 }
 
-static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
-{
-       struct rvt_qpn_map *map;
-
-       map = qpt->map + qpn / RVT_BITS_PER_PAGE;
-       if (map->page)
-               clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
-}
-
 /**
  * rvt_clear_mr_refs - Drop help mr refs
  * @qp: rvt qp data structure
@@ -645,6 +636,19 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
        lockdep_assert_held(&qp->s_lock);
 }
 
+/** rvt_free_qpn - Free a qpn from the bit map
+ * @qpt: QP table
+ * @qpn: queue pair number to free
+ */
+static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
+{
+       struct rvt_qpn_map *map;
+
+       map = qpt->map + (qpn & RVT_QPN_MASK) / RVT_BITS_PER_PAGE;
+       if (map->page)
+               clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
+}
+
 /**
  * rvt_create_qp - create a queue pair for a device
  * @ibpd: the protection domain who's device we create the queue pair for
@@ -914,7 +918,7 @@ bail_ip:
                kref_put(&qp->ip->ref, rvt_release_mmap_info);
 
 bail_qpn:
-       free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
+       rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
 
 bail_rq_wq:
        if (!qp->ip)
@@ -1301,19 +1305,6 @@ inval:
        return -EINVAL;
 }
 
-/** rvt_free_qpn - Free a qpn from the bit map
- * @qpt: QP table
- * @qpn: queue pair number to free
- */
-static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
-{
-       struct rvt_qpn_map *map;
-
-       map = qpt->map + qpn / RVT_BITS_PER_PAGE;
-       if (map->page)
-               clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
-}
-
 /**
  * rvt_destroy_qp - destroy a queue pair
  * @ibqp: the queue pair to destroy
@@ -1622,7 +1613,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
        struct rvt_pd *pd;
        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
        u8 log_pmtu;
-       int ret;
+       int ret, incr;
        size_t cplen;
        bool reserved_op;
        int local_ops_delayed = 0;
@@ -1695,22 +1686,23 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
        wqe->length = 0;
        j = 0;
        if (wr->num_sge) {
+               struct rvt_sge *last_sge = NULL;
+
                acc = wr->opcode >= IB_WR_RDMA_READ ?
                        IB_ACCESS_LOCAL_WRITE : 0;
                for (i = 0; i < wr->num_sge; i++) {
                        u32 length = wr->sg_list[i].length;
-                       int ok;
 
                        if (length == 0)
                                continue;
-                       ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
-                                        &wr->sg_list[i], acc);
-                       if (!ok) {
-                               ret = -EINVAL;
-                               goto bail_inval_free;
-                       }
+                       incr = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge,
+                                          &wr->sg_list[i], acc);
+                       if (unlikely(incr < 0))
+                               goto bail_lkey_error;
                        wqe->length += length;
-                       j++;
+                       if (incr)
+                               last_sge = &wqe->sg_list[j];
+                       j += incr;
                }
                wqe->wr.num_sge = j;
        }
@@ -1757,12 +1749,14 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
                wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
                qp->s_avail--;
        }
-       trace_rvt_post_one_wr(qp, wqe);
+       trace_rvt_post_one_wr(qp, wqe, wr->num_sge);
        smp_wmb(); /* see request builders */
        qp->s_head = next;
 
        return 0;
 
+bail_lkey_error:
+       ret = incr;
 bail_inval_free:
        /* release mr holds */
        while (j) {
index 3318a6c36373358f74a1582d9e156ed69ed807bb..976e482930a3f5e6cc6f4006093088b1f74fc85e 100644 (file)
@@ -103,6 +103,68 @@ DEFINE_EVENT(
        TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
        TP_ARGS(mr, m, n, v, len));
 
+DECLARE_EVENT_CLASS(
+       rvt_sge_template,
+       TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
+       TP_ARGS(sge, isge),
+       TP_STRUCT__entry(
+               RDI_DEV_ENTRY(ib_to_rvt(sge->mr->pd->device))
+               __field(struct rvt_mregion *, mr)
+               __field(struct rvt_sge *, sge)
+               __field(struct ib_sge *, isge)
+               __field(void *, vaddr)
+               __field(u64, ivaddr)
+               __field(u32, lkey)
+               __field(u32, sge_length)
+               __field(u32, length)
+               __field(u32, ilength)
+               __field(int, user)
+               __field(u16, m)
+               __field(u16, n)
+       ),
+       TP_fast_assign(
+               RDI_DEV_ASSIGN(ib_to_rvt(sge->mr->pd->device));
+               __entry->mr = sge->mr;
+               __entry->sge = sge;
+               __entry->isge = isge;
+               __entry->vaddr = sge->vaddr;
+               __entry->ivaddr = isge->addr;
+               __entry->lkey = sge->mr->lkey;
+               __entry->sge_length = sge->sge_length;
+               __entry->length = sge->length;
+               __entry->ilength = isge->length;
+               __entry->m = sge->m;
+               __entry->n = sge->m;
+               __entry->user = ibpd_to_rvtpd(sge->mr->pd)->user;
+       ),
+       TP_printk(
+               "[%s] mr %p sge %p isge %p vaddr %p ivaddr %llx lkey %x sge_length %u length %u ilength %u m %u n %u user %u",
+               __get_str(dev),
+               __entry->mr,
+               __entry->sge,
+               __entry->isge,
+               __entry->vaddr,
+               __entry->ivaddr,
+               __entry->lkey,
+               __entry->sge_length,
+               __entry->length,
+               __entry->ilength,
+               __entry->m,
+               __entry->n,
+               __entry->user
+       )
+);
+
+DEFINE_EVENT(
+       rvt_sge_template, rvt_sge_adjacent,
+       TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
+       TP_ARGS(sge, isge));
+
+DEFINE_EVENT(
+       rvt_sge_template, rvt_sge_new,
+       TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
+       TP_ARGS(sge, isge));
+
 #endif /* __RVT_TRACE_MR_H */
 
 #undef TRACE_INCLUDE_PATH
index a613a22237510744a266a7c0fcd1213150b2f345..0ef25fc49f25a2c027daf911a06df0be3dfa0d43 100644 (file)
@@ -84,12 +84,12 @@ __print_symbolic(opcode,                                   \
        wr_opcode_name(RESERVED10))
 
 #define POS_PRN \
-"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u"
+"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u wr_num_sge %u"
 
 TRACE_EVENT(
        rvt_post_one_wr,
-       TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
-       TP_ARGS(qp, wqe),
+       TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, int wr_num_sge),
+       TP_ARGS(qp, wqe, wr_num_sge),
        TP_STRUCT__entry(
                RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
                __field(u64, wr_id)
@@ -108,6 +108,7 @@ TRACE_EVENT(
                __field(int, send_flags)
                __field(pid_t, pid)
                __field(int, num_sge)
+               __field(int, wr_num_sge)
        ),
        TP_fast_assign(
                RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
@@ -127,6 +128,7 @@ TRACE_EVENT(
                __entry->ssn = wqe->ssn;
                __entry->send_flags = wqe->wr.send_flags;
                __entry->num_sge = wqe->wr.num_sge;
+               __entry->wr_num_sge = wr_num_sge;
        ),
        TP_printk(
                POS_PRN,
@@ -146,7 +148,8 @@ TRACE_EVENT(
                __entry->head,
                __entry->last,
                __entry->pid,
-               __entry->num_sge
+               __entry->num_sge,
+               __entry->wr_num_sge
        )
 );
 
index 0d7c6bb551d924ea76a05c512bc966c40ceb56dc..64bdd442078a13b32a77a03e04c8a87724de275f 100644 (file)
@@ -202,8 +202,13 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num,
                return -EINVAL;
 
        rvp = rdi->ports[port_index];
-       rvp->port_cap_flags |= props->set_port_cap_mask;
-       rvp->port_cap_flags &= ~props->clr_port_cap_mask;
+       if (port_modify_mask & IB_PORT_OPA_MASK_CHG) {
+               rvp->port_cap3_flags |= props->set_port_cap_mask;
+               rvp->port_cap3_flags &= ~props->clr_port_cap_mask;
+       } else {
+               rvp->port_cap_flags |= props->set_port_cap_mask;
+               rvp->port_cap_flags &= ~props->clr_port_cap_mask;
+       }
 
        if (props->set_port_cap_mask || props->clr_port_cap_mask)
                rdi->driver_f.cap_mask_chg(rdi, port_num);
index cf768dd78d1b4b8f6be52e083c4e148be78be6cc..249d15f13ab8fbdc1ff9b503fc2069fdc4965c7c 100644 (file)
@@ -52,7 +52,9 @@
 
 #include <linux/module.h>
 #include <rdma/ib_addr.h>
-#include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/opa_smi.h>
+#include <rdma/opa_port_info.h>
 
 #include "opa_vnic_internal.h"
 
@@ -979,6 +981,27 @@ static int vema_register(struct opa_vnic_ctrl_port *cport)
        return 0;
 }
 
+/**
+ * opa_vnic_ctrl_config_dev -- This function sends a trap to the EM
+ * by way of ib_modify_port to indicate support for ethernet on the
+ * fabric.
+ * @cport: pointer to control port
+ * @en: enable or disable ethernet on fabric support
+ */
+static void opa_vnic_ctrl_config_dev(struct opa_vnic_ctrl_port *cport, bool en)
+{
+       struct ib_port_modify pm = { 0 };
+       int i;
+
+       if (en)
+               pm.set_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported;
+       else
+               pm.clr_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported;
+
+       for (i = 1; i <= cport->num_ports; i++)
+               ib_modify_port(cport->ibdev, i, IB_PORT_OPA_MASK_CHG, &pm);
+}
+
 /**
  * opa_vnic_vema_add_one -- Handle new ib device
  * @device: ib device pointer
@@ -1007,6 +1030,7 @@ static void opa_vnic_vema_add_one(struct ib_device *device)
                c_info("VNIC client initialized\n");
 
        ib_set_client_data(device, &opa_vnic_client, cport);
+       opa_vnic_ctrl_config_dev(cport, true);
 }
 
 /**
@@ -1025,6 +1049,7 @@ static void opa_vnic_vema_rem_one(struct ib_device *device,
                return;
 
        c_info("removing VNIC client\n");
+       opa_vnic_ctrl_config_dev(cport, false);
        vema_unregister(cport);
        kfree(cport);
 }
index 5519f31f043a45b086e098407c4a15a2a7d02ca4..c124d515f7d545951dfc105d95ba1794a987e2bf 100644 (file)
@@ -193,8 +193,12 @@ static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth)
 #define IB_LNH_MASK            3
 #define IB_SC_MASK             0xf
 #define IB_SC_SHIFT            12
+#define IB_SC5_MASK            0x10
 #define IB_SL_MASK             0xf
 #define IB_SL_SHIFT            4
+#define IB_SL_SHIFT            4
+#define IB_LVER_MASK   0xf
+#define IB_LVER_SHIFT  8
 
 static inline u8 ib_get_lnh(struct ib_header *hdr)
 {
@@ -206,6 +210,11 @@ static inline u8 ib_get_sc(struct ib_header *hdr)
        return ((be16_to_cpu(hdr->lrh[0]) >> IB_SC_SHIFT) & IB_SC_MASK);
 }
 
+static inline bool ib_is_sc5(u16 sc5)
+{
+       return !!(sc5 & IB_SC5_MASK);
+}
+
 static inline u8 ib_get_sl(struct ib_header *hdr)
 {
        return ((be16_to_cpu(hdr->lrh[0]) >> IB_SL_SHIFT) & IB_SL_MASK);
@@ -221,6 +230,27 @@ static inline u16 ib_get_slid(struct ib_header *hdr)
        return (be16_to_cpu(hdr->lrh[3]));
 }
 
+static inline u8 ib_get_lver(struct ib_header *hdr)
+{
+       return (u8)((be16_to_cpu(hdr->lrh[0]) >> IB_LVER_SHIFT) &
+                  IB_LVER_MASK);
+}
+
+static inline u16 ib_get_len(struct ib_header *hdr)
+{
+       return (u16)(be16_to_cpu(hdr->lrh[2]));
+}
+
+static inline u32 ib_get_qkey(struct ib_other_headers *ohdr)
+{
+       return be32_to_cpu(ohdr->u.ud.deth[0]);
+}
+
+static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
+{
+       return ((be32_to_cpu(ohdr->u.ud.deth[1])) & IB_QPN_MASK);
+}
+
 /*
  * BTH
  */
@@ -229,6 +259,14 @@ static inline u16 ib_get_slid(struct ib_header *hdr)
 #define IB_BTH_PAD_MASK        3
 #define IB_BTH_PKEY_MASK       0xffff
 #define IB_BTH_PAD_SHIFT       20
+#define IB_BTH_A_MASK          1
+#define IB_BTH_A_SHIFT         31
+#define IB_BTH_M_MASK          1
+#define IB_BTH_M_SHIFT         22
+#define IB_BTH_SE_MASK         1
+#define IB_BTH_SE_SHIFT        23
+#define IB_BTH_TVER_MASK       0xf
+#define IB_BTH_TVER_SHIFT      16
 
 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
 {
@@ -247,4 +285,50 @@ static inline u8 ib_bth_get_opcode(struct ib_other_headers *ohdr)
                   IB_BTH_OPCODE_MASK);
 }
 
+static inline u8 ib_bth_get_ackreq(struct ib_other_headers *ohdr)
+{
+       return (u8)((be32_to_cpu(ohdr->bth[2]) >> IB_BTH_A_SHIFT) &
+                  IB_BTH_A_MASK);
+}
+
+static inline u8 ib_bth_get_migreq(struct ib_other_headers *ohdr)
+{
+       return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_M_SHIFT) &
+                   IB_BTH_M_MASK);
+}
+
+static inline u8 ib_bth_get_se(struct ib_other_headers *ohdr)
+{
+       return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_SE_SHIFT) &
+                   IB_BTH_SE_MASK);
+}
+
+static inline u32 ib_bth_get_psn(struct ib_other_headers *ohdr)
+{
+       return (u32)(be32_to_cpu(ohdr->bth[2]));
+}
+
+static inline u32 ib_bth_get_qpn(struct ib_other_headers *ohdr)
+{
+       return (u32)((be32_to_cpu(ohdr->bth[1])) & IB_QPN_MASK);
+}
+
+static inline u8 ib_bth_get_becn(struct ib_other_headers *ohdr)
+{
+       return (u8)((be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) &
+                    IB_BECN_MASK);
+}
+
+static inline u8 ib_bth_get_fecn(struct ib_other_headers *ohdr)
+{
+       return (u8)((be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) &
+                   IB_FECN_MASK);
+}
+
+static inline u8 ib_bth_get_tver(struct ib_other_headers *ohdr)
+{
+       return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_TVER_SHIFT)  &
+                   IB_BTH_TVER_MASK);
+}
+
 #endif                          /* IB_HDRS_H */
index b5732432bb297dbf6067ae34b16c00073cb2949d..593ad2640d2f9da5c54424e856fd8569b5a9471c 100644 (file)
@@ -577,7 +577,8 @@ struct ib_device_modify {
 enum ib_port_modify_flags {
        IB_PORT_SHUTDOWN                = 1,
        IB_PORT_INIT_TYPE               = (1<<2),
-       IB_PORT_RESET_QKEY_CNTR         = (1<<3)
+       IB_PORT_RESET_QKEY_CNTR         = (1<<3),
+       IB_PORT_OPA_MASK_CHG            = (1<<4)
 };
 
 struct ib_port_modify {
@@ -664,6 +665,8 @@ union rdma_network_hdr {
        };
 };
 
+#define IB_QPN_MASK            0xFFFFFF
+
 enum {
        IB_MULTICAST_QPN = 0xffffff
 };
index 55af692710539d6555c072a0c2a290aca7bfba42..22fb15ff5e8b7e9f83672ac054ccd796cb9dc074 100644 (file)
@@ -75,6 +75,7 @@ struct rvt_ibport {
        __be64 mkey;
        u64 tid;
        u32 port_cap_flags;
+       u16 port_cap3_flags;
        u32 pma_sample_start;
        u32 pma_sample_interval;
        __be16 pma_counter_select[5];
@@ -514,7 +515,8 @@ int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey);
 int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
                u32 len, u64 vaddr, u32 rkey, int acc);
 int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
-               struct rvt_sge *isge, struct ib_sge *sge, int acc);
+               struct rvt_sge *isge, struct rvt_sge *last_sge,
+               struct ib_sge *sge, int acc);
 struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
                                 u16 lid);
 
index d664d2e762808321d7b2aece6d3146b713b51631..07e2fffa6de686fbdcb6569d28db2cd9d329f4cf 100644 (file)
@@ -396,7 +396,7 @@ struct rvt_srq {
 #define RVT_QPNMAP_ENTRIES          (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
 #define RVT_BITS_PER_PAGE           (PAGE_SIZE * BITS_PER_BYTE)
 #define RVT_BITS_PER_PAGE_MASK      (RVT_BITS_PER_PAGE - 1)
-#define RVT_QPN_MASK               0xFFFFFF
+#define RVT_QPN_MASK               IB_QPN_MASK
 
 /*
  * QPN-map pages start out as NULL, they get allocated upon