Merge branch 'hfi1' into k.o/for-4.14
authorDoug Ledford <dledford@redhat.com>
Mon, 24 Jul 2017 12:33:43 +0000 (08:33 -0400)
committerDoug Ledford <dledford@redhat.com>
Mon, 24 Jul 2017 12:33:43 +0000 (08:33 -0400)
1  2 
drivers/infiniband/hw/hfi1/qp.c
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/hfi1/vnic_main.c
drivers/infiniband/hw/qib/qib_verbs.h
drivers/infiniband/sw/rdmavt/qp.c
drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
include/rdma/ib_verbs.h
include/rdma/rdma_vt.h
include/rdma/rdmavt_qp.h

index 1a7af9f60c137f916a125588318a182cf535ac01,e91be05062e63c9dca036be118b1ecad1c3a935f..198c7b4f3c789309f8a765ac049271de36d075fe
@@@ -73,12 -73,6 +73,6 @@@ static void iowait_wakeup(struct iowai
  static void iowait_sdma_drained(struct iowait *wait);
  static void qp_pio_drain(struct rvt_qp *qp);
  
- static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
-                             struct rvt_qpn_map *map, unsigned off)
- {
-       return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
- }
  const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
  [IB_WR_RDMA_WRITE] = {
        .length = sizeof(struct ib_rdma_wr),
@@@ -647,17 -641,18 +641,17 @@@ void qp_iter_print(struct seq_file *s, 
                   qp->pid);
  }
  
 -void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 -                  gfp_t gfp)
 +void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
  {
        struct hfi1_qp_priv *priv;
  
 -      priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node);
 +      priv = kzalloc_node(sizeof(*priv), GFP_KERNEL, rdi->dparms.node);
        if (!priv)
                return ERR_PTR(-ENOMEM);
  
        priv->owner = qp;
  
 -      priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), gfp,
 +      priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), GFP_KERNEL,
                                   rdi->dparms.node);
        if (!priv->s_ahg) {
                kfree(priv);
index 2d19f9bb434de5a0396f31996f20f4fd01eec179,5b53faf470424b8eb72c804c597e2aa3babc9042..9c9ded643ed44bee0ace051fe559a13a1fd17098
@@@ -508,13 -508,14 +508,14 @@@ again
  /*
   * Make sure the QP is ready and able to accept the given opcode.
   */
- static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
+ static inline opcode_handler qp_ok(struct hfi1_packet *packet)
  {
        if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
                return NULL;
-       if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
-           (opcode == IB_OPCODE_CNP))
-               return opcode_handler_tbl[opcode];
+       if (((packet->opcode & RVT_OPCODE_QP_MASK) ==
+            packet->qp->allowed_ops) ||
+           (packet->opcode == IB_OPCODE_CNP))
+               return opcode_handler_tbl[packet->opcode];
  
        return NULL;
  }
@@@ -548,69 -549,34 +549,34 @@@ static u64 hfi1_fault_tx(struct rvt_qp 
        return pbc;
  }
  
- /**
-  * hfi1_ib_rcv - process an incoming packet
-  * @packet: data packet information
-  *
-  * This is called to process an incoming packet at interrupt level.
-  *
-  * Tlen is the length of the header + data + CRC in bytes.
-  */
- void hfi1_ib_rcv(struct hfi1_packet *packet)
+ static inline void hfi1_handle_packet(struct hfi1_packet *packet,
+                                     bool is_mcast)
  {
+       u32 qp_num;
        struct hfi1_ctxtdata *rcd = packet->rcd;
-       struct ib_header *hdr = packet->hdr;
-       u32 tlen = packet->tlen;
        struct hfi1_pportdata *ppd = rcd->ppd;
        struct hfi1_ibport *ibp = rcd_to_iport(rcd);
        struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
        opcode_handler packet_handler;
        unsigned long flags;
-       u32 qp_num;
-       int lnh;
-       u8 opcode;
-       u16 lid;
-       /* Check for GRH */
-       lnh = ib_get_lnh(hdr);
-       if (lnh == HFI1_LRH_BTH) {
-               packet->ohdr = &hdr->u.oth;
-       } else if (lnh == HFI1_LRH_GRH) {
-               u32 vtf;
-               packet->ohdr = &hdr->u.l.oth;
-               if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
-                       goto drop;
-               vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
-               if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
-                       goto drop;
-               packet->rcv_flags |= HFI1_HAS_GRH;
-       } else {
-               goto drop;
-       }
  
-       trace_input_ibhdr(rcd->dd, hdr);
+       inc_opstats(packet->tlen, &rcd->opstats->stats[packet->opcode]);
  
-       opcode = ib_bth_get_opcode(packet->ohdr);
-       inc_opstats(tlen, &rcd->opstats->stats[opcode]);
-       /* Get the destination QP number. */
-       qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
-       lid = ib_get_dlid(hdr);
-       if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
-                    (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
+       if (unlikely(is_mcast)) {
                struct rvt_mcast *mcast;
                struct rvt_mcast_qp *p;
  
-               if (lnh != HFI1_LRH_GRH)
+               if (!packet->grh)
                        goto drop;
-               mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
+               mcast = rvt_mcast_find(&ibp->rvp,
+                                      &packet->grh->dgid,
+                                      packet->dlid);
                if (!mcast)
                        goto drop;
                list_for_each_entry_rcu(p, &mcast->qp_list, list) {
                        packet->qp = p->qp;
                        spin_lock_irqsave(&packet->qp->r_lock, flags);
-                       packet_handler = qp_ok(opcode, packet);
+                       packet_handler = qp_ok(packet);
                        if (likely(packet_handler))
                                packet_handler(packet);
                        else
                if (atomic_dec_return(&mcast->refcount) <= 1)
                        wake_up(&mcast->wait);
        } else {
+               /* Get the destination QP number. */
+               qp_num = ib_bth_get_qpn(packet->ohdr);
                rcu_read_lock();
                packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
                if (!packet->qp) {
                        rcu_read_unlock();
                        goto drop;
                }
-               if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode,
+               if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode,
                                                   true))) {
                        rcu_read_unlock();
                        goto drop;
                }
                spin_lock_irqsave(&packet->qp->r_lock, flags);
-               packet_handler = qp_ok(opcode, packet);
+               packet_handler = qp_ok(packet);
                if (likely(packet_handler))
                        packet_handler(packet);
                else
                rcu_read_unlock();
        }
        return;
  drop:
        ibp->rvp.n_pkt_drops++;
  }
  
+ /**
+  * hfi1_ib_rcv - process an incoming packet
+  * @packet: data packet information
+  *
+  * This is called to process an incoming packet at interrupt level.
+  */
+ void hfi1_ib_rcv(struct hfi1_packet *packet)
+ {
+       struct hfi1_ctxtdata *rcd = packet->rcd;
+       bool is_mcast = false;
+       if (unlikely(hfi1_check_mcast(packet->dlid)))
+               is_mcast = true;
+       trace_input_ibhdr(rcd->dd, packet,
+                         !!(packet->rhf & RHF_DC_INFO_SMASK));
+       hfi1_handle_packet(packet, is_mcast);
+ }
  /*
   * This is called from a timer to check for QPs
   * which need kernel memory in order to send a packet.
@@@ -863,7 -849,7 +849,7 @@@ int hfi1_verbs_send_dma(struct rvt_qp *
  
                        /* No vl15 here */
                        /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
-                       pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+                       pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
  
                        if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
                                pbc = hfi1_fault_tx(qp, opcode, pbc);
                return ret;
        }
        trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
-                               &ps->s_txreq->phdr.hdr);
+                               &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
        return ret;
  
  bail_ecomm:
@@@ -999,7 -985,7 +985,7 @@@ int hfi1_verbs_send_pio(struct rvt_qp *
                u8 opcode = get_opcode(&tx->phdr.hdr);
  
                /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
-               pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+               pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
                if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
                        pbc = hfi1_fault_tx(qp, opcode, pbc);
                pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
        }
  
        trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
-                              &ps->s_txreq->phdr.hdr);
+                              &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
  
  pio_bail:
        if (qp->s_wqe) {
@@@ -1368,7 -1354,7 +1354,7 @@@ static int query_port(struct rvt_dev_in
        props->lmc = ppd->lmc;
        /* OPA logical states match IB logical states */
        props->state = driver_lstate(ppd);
-       props->phys_state = hfi1_ibphys_portstate(ppd);
+       props->phys_state = driver_pstate(ppd);
        props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
        props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
        /* see rate_show() in ib core/sysfs.c */
@@@ -1551,9 -1537,13 +1537,13 @@@ static void init_ibport(struct hfi1_ppo
        /* Set the prefix to the default value (see ch. 4.1.1) */
        ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
        ibp->rvp.sm_lid = 0;
-       /* Below should only set bits defined in OPA PortInfo.CapabilityMask */
+       /*
+        * Below should only set bits defined in OPA PortInfo.CapabilityMask
+        * and PortInfo.CapabilityMask3
+        */
        ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
                IB_PORT_CAP_MASK_NOTICE_SUP;
+       ibp->rvp.port_cap3_flags = OPA_CAP_MASK3_IsSharedSpaceSupported;
        ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
        ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
        ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
@@@ -1779,6 -1769,7 +1769,6 @@@ int hfi1_register_ib_device(struct hfi1
        ibdev->alloc_hw_stats = alloc_hw_stats;
        ibdev->get_hw_stats = get_hw_stats;
        ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn;
 -      ibdev->free_rdma_netdev = hfi1_vnic_free_rn;
  
        /* keep process mad in the driver */
        ibdev->process_mad = hfi1_process_mad;
index 339f0cdd56d6bb30f1910245d0b4b493a0090e23,950c1b4df442879c72d87b56b30d486b90b72b2c..5a3f80ba9752d7c4edf6ec943af413d97c310c8e
@@@ -156,11 -156,11 +156,11 @@@ static int allocate_vnic_ctxt(struct hf
        return ret;
  bail:
        /*
-        * hfi1_free_ctxtdata() also releases send_context
-        * structure if uctxt->sc is not null
+        * hfi1_rcd_put() will call hfi1_free_ctxtdata(), which will
+        * release send_context structure if uctxt->sc is not null
         */
        dd->rcd[uctxt->ctxt] = NULL;
-       hfi1_free_ctxtdata(dd, uctxt);
+       hfi1_rcd_put(uctxt);
        dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
        return ret;
  }
@@@ -208,7 -208,7 +208,7 @@@ static void deallocate_vnic_ctxt(struc
        hfi1_clear_ctxt_pkey(dd, uctxt);
  
        hfi1_stats.sps_ctxts--;
-       hfi1_free_ctxtdata(dd, uctxt);
+       hfi1_rcd_put(uctxt);
  }
  
  void hfi1_vnic_setup(struct hfi1_devdata *dd)
@@@ -751,6 -751,7 +751,7 @@@ static int hfi1_vnic_init(struct hfi1_v
                rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
                if (rc)
                        break;
+               hfi1_rcd_get(dd->vnic.ctxt[i]);
                dd->vnic.ctxt[i]->vnic_q_idx = i;
        }
  
                 */
                while (i-- > dd->vnic.num_ctxt) {
                        deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+                       hfi1_rcd_put(dd->vnic.ctxt[i]);
                        dd->vnic.ctxt[i] = NULL;
                }
                goto alloc_fail;
@@@ -791,6 -793,7 +793,7 @@@ static void hfi1_vnic_deinit(struct hfi
        if (--dd->vnic.num_vports == 0) {
                for (i = 0; i < dd->vnic.num_ctxt; i++) {
                        deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+                       hfi1_rcd_put(dd->vnic.ctxt[i]);
                        dd->vnic.ctxt[i] = NULL;
                }
                hfi1_deinit_vnic_rsm(dd);
@@@ -833,15 -836,6 +836,15 @@@ static const struct net_device_ops hfi1
        .ndo_get_stats64 = hfi1_vnic_get_stats64,
  };
  
 +static void hfi1_vnic_free_rn(struct net_device *netdev)
 +{
 +      struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 +
 +      hfi1_vnic_deinit(vinfo);
 +      mutex_destroy(&vinfo->lock);
 +      free_netdev(netdev);
 +}
 +
  struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
                                      u8 port_num,
                                      enum rdma_netdev_t type,
        vinfo->num_tx_q = dd->chip_sdma_engines;
        vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT;
        vinfo->netdev = netdev;
 +      rn->free_rdma_netdev = hfi1_vnic_free_rn;
        rn->set_id = hfi1_vnic_set_vesw_id;
  
        netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
@@@ -902,3 -895,12 +905,3 @@@ init_fail
        free_netdev(netdev);
        return ERR_PTR(rc);
  }
 -
 -void hfi1_vnic_free_rn(struct net_device *netdev)
 -{
 -      struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
 -
 -      hfi1_vnic_deinit(vinfo);
 -      mutex_destroy(&vinfo->lock);
 -      free_netdev(netdev);
 -}
index a52fc67b40d73ab3e7e3c6659bf050cd16a0035d,33d5691a9b2dbf94d75e72a54805ac0ed1e7ebb9..95e370192948b462213e5a46ad15b46b714370f4
@@@ -241,8 -241,8 +241,8 @@@ static inline int qib_pkey_ok(u16 pkey1
        return p1 && p1 == p2 && ((__s16)pkey1 < 0 || (__s16)pkey2 < 0);
  }
  
- void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
-                  u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
+ void qib_bad_pkey(struct qib_ibport *ibp, u32 key, u32 sl,
+                 u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
  void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
  void qib_sys_guid_chg(struct qib_ibport *ibp);
  void qib_node_desc_chg(struct qib_ibport *ibp);
@@@ -274,11 -274,11 +274,11 @@@ int qib_get_counters(struct qib_pportda
   * Functions provided by qib driver for rdmavt to use
   */
  unsigned qib_free_all_qps(struct rvt_dev_info *rdi);
 -void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp);
 +void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp);
  void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
  void qib_notify_qp_reset(struct rvt_qp *qp);
  int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
 -                enum ib_qp_type type, u8 port, gfp_t gfp);
 +                enum ib_qp_type type, u8 port);
  void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
  #ifdef CONFIG_DEBUG_FS
  
index 8876ee7bc326c9d5a05f438830ec1ba0740eebef,2ce0928dddd644e7b8a720ddd8d582e2f28c6d44..740611e4692ab100fdb70eecd054967385ee73bd
@@@ -118,9 -118,10 +118,9 @@@ const int ib_rvt_state_ops[IB_QPS_ERR 
  EXPORT_SYMBOL(ib_rvt_state_ops);
  
  static void get_map_page(struct rvt_qpn_table *qpt,
 -                       struct rvt_qpn_map *map,
 -                       gfp_t gfp)
 +                       struct rvt_qpn_map *map)
  {
 -      unsigned long page = get_zeroed_page(gfp);
 +      unsigned long page = get_zeroed_page(GFP_KERNEL);
  
        /*
         * Free the page if someone raced with us installing it.
@@@ -172,7 -173,7 +172,7 @@@ static int init_qpn_table(struct rvt_de
                    rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end);
        for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) {
                if (!map->page) {
 -                      get_map_page(qpt, map, GFP_KERNEL);
 +                      get_map_page(qpt, map);
                        if (!map->page) {
                                ret = -ENOMEM;
                                break;
@@@ -341,14 -342,14 +341,14 @@@ static inline unsigned mk_qpn(struct rv
   * Return: The queue pair number
   */
  static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
 -                   enum ib_qp_type type, u8 port_num, gfp_t gfp)
 +                   enum ib_qp_type type, u8 port_num)
  {
        u32 i, offset, max_scan, qpn;
        struct rvt_qpn_map *map;
        u32 ret;
  
        if (rdi->driver_f.alloc_qpn)
 -              return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, gfp);
 +              return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num);
  
        if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
                unsigned n;
        max_scan = qpt->nmaps - !offset;
        for (i = 0;;) {
                if (unlikely(!map->page)) {
 -                      get_map_page(qpt, map, gfp);
 +                      get_map_page(qpt, map);
                        if (unlikely(!map->page))
                                break;
                }
@@@ -421,15 -422,6 +421,6 @@@ bail
        return ret;
  }
  
- static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
- {
-       struct rvt_qpn_map *map;
-       map = qpt->map + qpn / RVT_BITS_PER_PAGE;
-       if (map->page)
-               clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
- }
  /**
   * rvt_clear_mr_refs - Drop help mr refs
   * @qp: rvt qp data structure
@@@ -645,6 -637,19 +636,19 @@@ static void rvt_reset_qp(struct rvt_dev
        lockdep_assert_held(&qp->s_lock);
  }
  
+ /** rvt_free_qpn - Free a qpn from the bit map
+  * @qpt: QP table
+  * @qpn: queue pair number to free
+  */
+ static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
+ {
+       struct rvt_qpn_map *map;
+       map = qpt->map + (qpn & RVT_QPN_MASK) / RVT_BITS_PER_PAGE;
+       if (map->page)
+               clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
+ }
  /**
   * rvt_create_qp - create a queue pair for a device
   * @ibpd: the protection domain who's device we create the queue pair for
@@@ -671,6 -676,7 +675,6 @@@ struct ib_qp *rvt_create_qp(struct ib_p
        struct ib_qp *ret = ERR_PTR(-ENOMEM);
        struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
        void *priv = NULL;
 -      gfp_t gfp;
        size_t sqsize;
  
        if (!rdi)
  
        if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
            init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
 -          init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
 +          init_attr->create_flags)
                return ERR_PTR(-EINVAL);
  
 -      /* GFP_NOIO is applicable to RC QP's only */
 -
 -      if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
 -          init_attr->qp_type != IB_QPT_RC)
 -              return ERR_PTR(-EINVAL);
 -
 -      gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
 -                                              GFP_NOIO : GFP_KERNEL;
 -
        /* Check receive queue parameters if no SRQ is specified. */
        if (!init_attr->srq) {
                if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
                sz = sizeof(struct rvt_sge) *
                        init_attr->cap.max_send_sge +
                        sizeof(struct rvt_swqe);
 -              if (gfp == GFP_NOIO)
 -                      swq = __vmalloc(
 -                              sqsize * sz,
 -                              gfp | __GFP_ZERO, PAGE_KERNEL);
 -              else
 -                      swq = vzalloc_node(
 -                              sqsize * sz,
 -                              rdi->dparms.node);
 +              swq = vzalloc_node(sqsize * sz, rdi->dparms.node);
                if (!swq)
                        return ERR_PTR(-ENOMEM);
  
                } else if (init_attr->cap.max_recv_sge > 1)
                        sg_list_sz = sizeof(*qp->r_sg_list) *
                                (init_attr->cap.max_recv_sge - 1);
 -              qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node);
 +              qp = kzalloc_node(sz + sg_list_sz, GFP_KERNEL,
 +                                rdi->dparms.node);
                if (!qp)
                        goto bail_swq;
  
                                kzalloc_node(
                                        sizeof(*qp->s_ack_queue) *
                                         rvt_max_atomic(rdi),
 -                                      gfp,
 +                                      GFP_KERNEL,
                                        rdi->dparms.node);
                        if (!qp->s_ack_queue)
                                goto bail_qp;
                 * Driver needs to set up it's private QP structure and do any
                 * initialization that is needed.
                 */
 -              priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp);
 +              priv = rdi->driver_f.qp_priv_alloc(rdi, qp);
                if (IS_ERR(priv)) {
                        ret = priv;
                        goto bail_qp;
                                qp->r_rq.wq = vmalloc_user(
                                                sizeof(struct rvt_rwq) +
                                                qp->r_rq.size * sz);
 -                      else if (gfp == GFP_NOIO)
 -                              qp->r_rq.wq = __vmalloc(
 -                                              sizeof(struct rvt_rwq) +
 -                                              qp->r_rq.size * sz,
 -                                              gfp | __GFP_ZERO, PAGE_KERNEL);
                        else
                                qp->r_rq.wq = vzalloc_node(
                                                sizeof(struct rvt_rwq) +
  
                err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
                                init_attr->qp_type,
 -                              init_attr->port_num, gfp);
 +                              init_attr->port_num);
                if (err < 0) {
                        ret = ERR_PTR(err);
                        goto bail_rq_wq;
@@@ -914,7 -940,7 +918,7 @@@ bail_ip
                kref_put(&qp->ip->ref, rvt_release_mmap_info);
  
  bail_qpn:
-       free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
+       rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
  
  bail_rq_wq:
        if (!qp->ip)
@@@ -1258,7 -1284,9 +1262,7 @@@ int rvt_modify_qp(struct ib_qp *ibqp, s
  
        if (attr_mask & IB_QP_TIMEOUT) {
                qp->timeout = attr->timeout;
 -              qp->timeout_jiffies =
 -                      usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
 -                              1000UL);
 +              qp->timeout_jiffies = rvt_timeout_to_jiffies(qp->timeout);
        }
  
        if (attr_mask & IB_QP_QKEY)
@@@ -1301,19 -1329,6 +1305,6 @@@ inval
        return -EINVAL;
  }
  
- /** rvt_free_qpn - Free a qpn from the bit map
-  * @qpt: QP table
-  * @qpn: queue pair number to free
-  */
- static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
- {
-       struct rvt_qpn_map *map;
-       map = qpt->map + qpn / RVT_BITS_PER_PAGE;
-       if (map->page)
-               clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
- }
  /**
   * rvt_destroy_qp - destroy a queue pair
   * @ibqp: the queue pair to destroy
@@@ -1622,7 -1637,7 +1613,7 @@@ static int rvt_post_one_wr(struct rvt_q
        struct rvt_pd *pd;
        struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
        u8 log_pmtu;
-       int ret;
+       int ret, incr;
        size_t cplen;
        bool reserved_op;
        int local_ops_delayed = 0;
        wqe->length = 0;
        j = 0;
        if (wr->num_sge) {
+               struct rvt_sge *last_sge = NULL;
                acc = wr->opcode >= IB_WR_RDMA_READ ?
                        IB_ACCESS_LOCAL_WRITE : 0;
                for (i = 0; i < wr->num_sge; i++) {
                        u32 length = wr->sg_list[i].length;
-                       int ok;
  
                        if (length == 0)
                                continue;
-                       ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
-                                        &wr->sg_list[i], acc);
-                       if (!ok) {
-                               ret = -EINVAL;
-                               goto bail_inval_free;
-                       }
+                       incr = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge,
+                                          &wr->sg_list[i], acc);
+                       if (unlikely(incr < 0))
+                               goto bail_lkey_error;
                        wqe->length += length;
-                       j++;
+                       if (incr)
+                               last_sge = &wqe->sg_list[j];
+                       j += incr;
                }
                wqe->wr.num_sge = j;
        }
                wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
                qp->s_avail--;
        }
-       trace_rvt_post_one_wr(qp, wqe);
+       trace_rvt_post_one_wr(qp, wqe, wr->num_sge);
        smp_wmb(); /* see request builders */
        qp->s_head = next;
  
        return 0;
  
+ bail_lkey_error:
+       ret = incr;
  bail_inval_free:
        /* release mr holds */
        while (j) {
index cf768dd78d1b4b8f6be52e083c4e148be78be6cc,32cdd7a35415eb8e3517f8ce5b6ee72b94f70331..249d15f13ab8fbdc1ff9b503fc2069fdc4965c7c
@@@ -52,7 -52,9 +52,9 @@@
  
  #include <linux/module.h>
  #include <rdma/ib_addr.h>
- #include <rdma/ib_smi.h>
+ #include <rdma/ib_verbs.h>
+ #include <rdma/opa_smi.h>
+ #include <rdma/opa_port_info.h>
  
  #include "opa_vnic_internal.h"
  
@@@ -794,7 -796,7 +796,7 @@@ void opa_vnic_vema_send_trap(struct opa
  
        send_buf = ib_create_send_mad(port->mad_agent, 1, pkey_idx, 0,
                                      IB_MGMT_VENDOR_HDR, IB_MGMT_MAD_DATA,
 -                                    GFP_KERNEL, OPA_MGMT_BASE_VERSION);
 +                                    GFP_ATOMIC, OPA_MGMT_BASE_VERSION);
        if (IS_ERR(send_buf)) {
                c_err("%s:Couldn't allocate send buf\n", __func__);
                goto err_sndbuf;
@@@ -979,6 -981,27 +981,27 @@@ static int vema_register(struct opa_vni
        return 0;
  }
  
+ /**
+  * opa_vnic_ctrl_config_dev -- This function sends a trap to the EM
+  * by way of ib_modify_port to indicate support for ethernet on the
+  * fabric.
+  * @cport: pointer to control port
+  * @en: enable or disable ethernet on fabric support
+  */
+ static void opa_vnic_ctrl_config_dev(struct opa_vnic_ctrl_port *cport, bool en)
+ {
+       struct ib_port_modify pm = { 0 };
+       int i;
+       if (en)
+               pm.set_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported;
+       else
+               pm.clr_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported;
+       for (i = 1; i <= cport->num_ports; i++)
+               ib_modify_port(cport->ibdev, i, IB_PORT_OPA_MASK_CHG, &pm);
+ }
  /**
   * opa_vnic_vema_add_one -- Handle new ib device
   * @device: ib device pointer
@@@ -1007,6 -1030,7 +1030,7 @@@ static void opa_vnic_vema_add_one(struc
                c_info("VNIC client initialized\n");
  
        ib_set_client_data(device, &opa_vnic_client, cport);
+       opa_vnic_ctrl_config_dev(cport, true);
  }
  
  /**
@@@ -1025,6 -1049,7 +1049,7 @@@ static void opa_vnic_vema_rem_one(struc
                return;
  
        c_info("removing VNIC client\n");
+       opa_vnic_ctrl_config_dev(cport, false);
        vema_unregister(cport);
        kfree(cport);
  }
diff --combined include/rdma/ib_verbs.h
index b5732432bb297dbf6067ae34b16c00073cb2949d,9d4d2a74c95e1639ffb64fab86b2dee5021dc21c..593ad2640d2f9da5c54424e856fd8569b5a9471c
@@@ -577,7 -577,8 +577,8 @@@ struct ib_device_modify 
  enum ib_port_modify_flags {
        IB_PORT_SHUTDOWN                = 1,
        IB_PORT_INIT_TYPE               = (1<<2),
-       IB_PORT_RESET_QKEY_CNTR         = (1<<3)
+       IB_PORT_RESET_QKEY_CNTR         = (1<<3),
+       IB_PORT_OPA_MASK_CHG            = (1<<4)
  };
  
  struct ib_port_modify {
@@@ -664,6 -665,8 +665,8 @@@ union rdma_network_hdr 
        };
  };
  
+ #define IB_QPN_MASK           0xFFFFFF
  enum {
        IB_MULTICAST_QPN = 0xffffff
  };
@@@ -1056,7 -1059,7 +1059,7 @@@ enum ib_qp_create_flags 
        IB_QP_CREATE_MANAGED_RECV               = 1 << 4,
        IB_QP_CREATE_NETIF_QP                   = 1 << 5,
        IB_QP_CREATE_SIGNATURE_EN               = 1 << 6,
 -      IB_QP_CREATE_USE_GFP_NOIO               = 1 << 7,
 +      /* FREE                                 = 1 << 7, */
        IB_QP_CREATE_SCATTER_FCS                = 1 << 8,
        IB_QP_CREATE_CVLAN_STRIPPING            = 1 << 9,
        /* reserve bits 26-31 for low level drivers' internal use */
@@@ -1614,45 -1617,6 +1617,45 @@@ struct ib_rwq_ind_table_init_attr 
        struct ib_wq    **ind_tbl;
  };
  
 +enum port_pkey_state {
 +      IB_PORT_PKEY_NOT_VALID = 0,
 +      IB_PORT_PKEY_VALID = 1,
 +      IB_PORT_PKEY_LISTED = 2,
 +};
 +
 +struct ib_qp_security;
 +
 +struct ib_port_pkey {
 +      enum port_pkey_state    state;
 +      u16                     pkey_index;
 +      u8                      port_num;
 +      struct list_head        qp_list;
 +      struct list_head        to_error_list;
 +      struct ib_qp_security  *sec;
 +};
 +
 +struct ib_ports_pkeys {
 +      struct ib_port_pkey     main;
 +      struct ib_port_pkey     alt;
 +};
 +
 +struct ib_qp_security {
 +      struct ib_qp           *qp;
 +      struct ib_device       *dev;
 +      /* Hold this mutex when changing port and pkey settings. */
 +      struct mutex            mutex;
 +      struct ib_ports_pkeys  *ports_pkeys;
 +      /* A list of all open shared QP handles.  Required to enforce security
 +       * properly for all users of a shared QP.
 +       */
 +      struct list_head        shared_qp_list;
 +      void                   *security;
 +      bool                    destroying;
 +      atomic_t                error_list_count;
 +      struct completion       error_complete;
 +      int                     error_comps_pending;
 +};
 +
  /*
   * @max_write_sge: Maximum SGE elements per RDMA WRITE request.
   * @max_read_sge:  Maximum SGE elements per RDMA READ request.
@@@ -1682,7 -1646,6 +1685,7 @@@ struct ib_qp 
        u32                     max_read_sge;
        enum ib_qp_type         qp_type;
        struct ib_rwq_ind_table *rwq_ind_tbl;
 +      struct ib_qp_security  *qp_sec;
  };
  
  struct ib_mr {
@@@ -1931,7 -1894,6 +1934,7 @@@ enum ib_mad_result 
  };
  
  struct ib_port_cache {
 +      u64                   subnet_prefix;
        struct ib_pkey_cache  *pkey;
        struct ib_gid_table   *gid;
        u8                     lmc;
@@@ -1968,9 -1930,6 +1971,9 @@@ struct rdma_netdev 
        struct ib_device  *hca;
        u8                 port_num;
  
 +      /* cleanup function must be specified */
 +      void (*free_rdma_netdev)(struct net_device *netdev);
 +
        /* control functions */
        void (*set_id)(struct net_device *netdev, int id);
        /* send packet */
                            union ib_gid *gid, u16 mlid);
  };
  
 +struct ib_port_pkey_list {
 +      /* Lock to hold while modifying the list. */
 +      spinlock_t                    list_lock;
 +      struct list_head              pkey_list;
 +};
 +
  struct ib_device {
        /* Do not access @dma_device directly from ULP nor from HW drivers. */
        struct device                *dma_device;
  
        int                           num_comp_vectors;
  
 +      struct ib_port_pkey_list     *port_pkey_list;
 +
        struct iw_cm_verbs           *iwcm;
  
        /**
                                                           struct ib_udata *udata);
        int                        (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
        /**
 -       * rdma netdev operations
 +       * rdma netdev operation
         *
         * Driver implementing alloc_rdma_netdev must return -EOPNOTSUPP if it
         * doesn't support the specified rdma netdev type.
                                        const char *name,
                                        unsigned char name_assign_type,
                                        void (*setup)(struct net_device *));
 -      void (*free_rdma_netdev)(struct net_device *netdev);
  
        struct module               *owner;
        struct device                dev;
@@@ -2947,22 -2899,6 +2950,22 @@@ static inline int ib_post_srq_recv(stru
  struct ib_qp *ib_create_qp(struct ib_pd *pd,
                           struct ib_qp_init_attr *qp_init_attr);
  
 +/**
 + * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
 + * @qp: The QP to modify.
 + * @attr: On input, specifies the QP attributes to modify.  On output,
 + *   the current values of selected QP attributes are returned.
 + * @attr_mask: A bit-mask used to specify which attributes of the QP
 + *   are being modified.
 + * @udata: pointer to user's input output buffer information
 + *   are being modified.
 + * It returns 0 on success and returns appropriate error code on error.
 + */
 +int ib_modify_qp_with_udata(struct ib_qp *qp,
 +                          struct ib_qp_attr *attr,
 +                          int attr_mask,
 +                          struct ib_udata *udata);
 +
  /**
   * ib_modify_qp - Modifies the attributes for the specified QP and then
   *   transitions the QP to the given state.
diff --combined include/rdma/rdma_vt.h
index 55af692710539d6555c072a0c2a290aca7bfba42,0f18ffd98dd75130fc0468f3e4035430f87b0223..22fb15ff5e8b7e9f83672ac054ccd796cb9dc074
@@@ -75,6 -75,7 +75,7 @@@ struct rvt_ibport 
        __be64 mkey;
        u64 tid;
        u32 port_cap_flags;
+       u16 port_cap3_flags;
        u32 pma_sample_start;
        u32 pma_sample_interval;
        __be16 pma_counter_select[5];
@@@ -229,7 -230,8 +230,7 @@@ struct rvt_driver_provided 
         * ERR_PTR(err).  The driver is free to return NULL or a valid
         * pointer.
         */
 -      void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 -                              gfp_t gfp);
 +      void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
  
        /*
         * Free the driver's private qp structure.
  
        /* Let the driver pick the next queue pair number*/
        int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
 -                       enum ib_qp_type type, u8 port_num, gfp_t gfp);
 +                       enum ib_qp_type type, u8 port_num);
  
        /* Determine if its safe or allowed to modify the qp */
        int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
@@@ -514,7 -516,8 +515,8 @@@ int rvt_invalidate_rkey(struct rvt_qp *
  int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
                u32 len, u64 vaddr, u32 rkey, int acc);
  int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
-               struct rvt_sge *isge, struct ib_sge *sge, int acc);
+               struct rvt_sge *isge, struct rvt_sge *last_sge,
+               struct ib_sge *sge, int acc);
  struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
                                 u16 lid);
  
diff --combined include/rdma/rdmavt_qp.h
index d664d2e762808321d7b2aece6d3146b713b51631,13f43b3527a83b3d328ea0162e1aa39184549ba2..07e2fffa6de686fbdcb6569d28db2cd9d329f4cf
@@@ -396,7 -396,7 +396,7 @@@ struct rvt_srq 
  #define RVT_QPNMAP_ENTRIES          (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
  #define RVT_BITS_PER_PAGE           (PAGE_SIZE * BITS_PER_BYTE)
  #define RVT_BITS_PER_PAGE_MASK      (RVT_BITS_PER_PAGE - 1)
- #define RVT_QPN_MASK              0xFFFFFF
+ #define RVT_QPN_MASK              IB_QPN_MASK
  
  /*
   * QPN-map pages start out as NULL, they get allocated upon
@@@ -647,20 -647,6 +647,20 @@@ static inline u32 rvt_div_mtu(struct rv
        return len >> qp->log_pmtu;
  }
  
 +/**
 + * rvt_timeout_to_jiffies - Convert a ULP timeout input into jiffies
 + * @timeout - timeout input(0 - 31).
 + *
 + * Return a timeout value in jiffies.
 + */
 +static inline unsigned long rvt_timeout_to_jiffies(u8 timeout)
 +{
 +      if (timeout > 31)
 +              timeout = 31;
 +
 +      return usecs_to_jiffies(1U << timeout) * 4096UL / 1000UL;
 +}
 +
  extern const int  ib_rvt_state_ops[];
  
  struct rvt_dev_info;