IB/hfi1: Add transmit fault injection feature
authorDon Hiatt <don.hiatt@intel.com>
Tue, 21 Mar 2017 00:26:20 +0000 (17:26 -0700)
committerDoug Ledford <dledford@redhat.com>
Wed, 5 Apr 2017 18:45:09 +0000 (14:45 -0400)
Add ability to fault packets on transmit by opcode.
Dropping by packet can be achieved by setting the mask to 0.

In order to drop non-verbs traffic we set PbcInsertHrc
to NONE (0x2). The packet will still be delivered to
the receiving node but a KHdrHCRCErr (KDETH packet
with a bad HCRC) will be triggered and the packet will
not be delivered to the correct context.

In order to drop regular verbs traffic we set the
PbcTestEbp flag. The packet will still be delivered
to the receiving node but a 'late ebp error' will
be triggered and will be dropped.

A global toggle (/sys/kernel/debug/hfi1/hfi1_X/fault_suppress_err)
has been added to suppress the error messages on the receive
node when a packet was faulted on the sending node.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Don Hiatt <don.hiatt@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/debugfs.c
drivers/infiniband/hw/hfi1/debugfs.h
drivers/infiniband/hw/hfi1/driver.c
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/hfi1/verbs.h
include/rdma/ib_pack.h

index 77f4b41de2b0e1fd9a2dfcbef8e8487dedbf6dd1..79a316acb8f48a3372d3da6634d334d0e986c277 100644 (file)
@@ -64,6 +64,7 @@
 #include "platform.h"
 #include "aspm.h"
 #include "affinity.h"
+#include "debugfs.h"
 
 #define NUM_IB_PORTS 1
 
@@ -7898,6 +7899,9 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
                reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
        }
 
+       if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev)))
+               reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK;
+
        /* report any remaining errors */
        if (reg)
                dd_dev_info_ratelimited(dd, "DCC Error: %s\n",
index cac6d5256f4085bf9e10eed17b41ba530fb3b7f6..dc2c1c993f04bbc1efbf97ab3e4ff4cac22317f2 100644 (file)
@@ -1240,6 +1240,11 @@ static int fault_init_debugfs(struct hfi1_ibdev *ibd)
        return ret;
 }
 
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+       return ibd->fault_suppress_err;
+}
+
 bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
 {
        bool ret = false;
@@ -1329,6 +1334,9 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
                }
 
 #ifdef CONFIG_FAULT_INJECTION
+       debugfs_create_bool("fault_suppress_err", 0600,
+                           ibd->hfi1_ibdev_dbg,
+                           &ibd->fault_suppress_err);
        fault_init_debugfs(ibd);
 #endif
 }
index 70be5ca14736a0b793640b3645b3b7f726b563b5..38c38a98156d67e4b2da56ed2ffb2cb585b3fe6a 100644 (file)
@@ -75,6 +75,7 @@ struct fault_packet {
 
 bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
 bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
 #else
 static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
 {
@@ -86,6 +87,11 @@ static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
 {
        return false;
 }
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+       return false;
+}
 #endif
 
 #else
@@ -115,6 +121,11 @@ static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
 {
        return false;
 }
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+       return false;
+}
 #endif
 
 #endif                          /* _HFI1_DEBUGFS_H */
index c0b012f6e11cbcde6b7c950b801533ceda0d7bf9..64bdbcef5f05e43cfb42db5721378b68f7b67463 100644 (file)
@@ -1367,6 +1367,11 @@ int process_receive_ib(struct hfi1_packet *packet)
                          packet->updegr,
                          rhf_egr_index(packet->rhf));
 
+       if (unlikely(
+                (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+                (packet->rhf & RHF_DC_ERR))))
+               return RHF_RCV_CONTINUE;
+
        if (unlikely(rhf_err_flags(packet->rhf))) {
                handle_eflags(packet);
                return RHF_RCV_CONTINUE;
@@ -1402,6 +1407,12 @@ int process_receive_bypass(struct hfi1_packet *packet)
 
 int process_receive_error(struct hfi1_packet *packet)
 {
+       /* KHdrHCRCErr -- KDETH packet with a bad HCRC */
+       if (unlikely(
+                hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+                rhf_rcv_type_err(packet->rhf) == 3))
+               return RHF_RCV_CONTINUE;
+
        handle_eflags(packet);
 
        if (unlikely(rhf_err_flags(packet->rhf)))
index 9f016daba2565c86aff6219d0aaf27cc9cf5b7e1..070a349afd7879b059311abb45daaba0a3c5e9cf 100644 (file)
@@ -518,6 +518,35 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
        return NULL;
 }
 
+static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
+{
+#ifdef CONFIG_FAULT_INJECTION
+       if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP)
+               /*
+                * In order to drop non-IB traffic we
+                * set PbcInsertHrc to NONE (0x2).
+                * The packet will still be delivered
+                * to the receiving node but a
+                * KHdrHCRCErr (KDETH packet with a bad
+                * HCRC) will be triggered and the
+                * packet will not be delivered to the
+                * correct context.
+                */
+               pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
+       else
+               /*
+                * In order to drop regular verbs
+                * traffic we set the PbcTestEbp
+                * flag. The packet will still be
+                * delivered to the receiving node but
+                * a 'late ebp error' will be
+                * triggered and will be dropped.
+                */
+               pbc |= PBC_TEST_EBP;
+#endif
+       return pbc;
+}
+
 /**
  * hfi1_ib_rcv - process an incoming packet
  * @packet: data packet information
@@ -803,7 +832,6 @@ static int build_verbs_tx_desc(
                if (ret)
                        goto bail_txadd;
        }
-
        /* add the ulp payload - if any. tx->ss can be NULL for acks */
        if (tx->ss)
                ret = build_verbs_ulp_payload(sde, length, tx);
@@ -822,7 +850,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
        struct hfi1_ibdev *dev = ps->dev;
        struct hfi1_pportdata *ppd = ps->ppd;
        struct verbs_txreq *tx;
-       u64 pbc_flags = 0;
        u8 sc5 = priv->s_sc;
 
        int ret;
@@ -831,12 +858,16 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
        if (!sdma_txreq_built(&tx->txreq)) {
                if (likely(pbc == 0)) {
                        u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+                       u8 opcode = get_opcode(&tx->phdr.hdr);
+
                        /* No vl15 here */
                        /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
-                       pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+                       pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
 
+                       if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+                               pbc = hfi1_fault_tx(qp, opcode, pbc);
                        pbc = create_pbc(ppd,
-                                        pbc_flags,
+                                        pbc,
                                         qp->srate_mbps,
                                         vl,
                                         plen);
@@ -939,7 +970,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
        u32 plen = hdrwords + dwords + 2; /* includes pbc */
        struct hfi1_pportdata *ppd = ps->ppd;
        u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
-       u64 pbc_flags = 0;
        u8 sc5;
        unsigned long flags = 0;
        struct send_context *sc;
@@ -964,9 +994,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 
        if (likely(pbc == 0)) {
                u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+               struct verbs_txreq *tx = ps->s_txreq;
+               u8 opcode = get_opcode(&tx->phdr.hdr);
+
                /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
-               pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
-               pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
+               pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+               if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
+                       pbc = hfi1_fault_tx(qp, opcode, pbc);
+               pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
        }
        if (cb)
                iowait_pio_inc(&priv->s_iowait);
index 2756ec35b054b0011c14d55c299a0707c14f6769..6c549e7a25e7926d9835bd6f37ba25fd7f35b03b 100644 (file)
@@ -198,6 +198,7 @@ struct hfi1_ibdev {
 #ifdef CONFIG_FAULT_INJECTION
        struct fault_opcode *fault_opcode;
        struct fault_packet *fault_packet;
+       bool fault_suppress_err;
 #endif
 #endif
 };
index b13419ce99ff5b52d666c2af7a91f58457f58100..36655899ee028d568257f2b2cc8764b136ab0e20 100644 (file)
@@ -80,6 +80,8 @@ enum {
        IB_OPCODE_UD                                = 0x60,
        /* per IBTA 1.3 vol 1 Table 38, A10.3.2 */
        IB_OPCODE_CNP                               = 0x80,
+       /* Manufacturer specific */
+       IB_OPCODE_MSP                               = 0xe0,
 
        /* operations -- just used to define real constants */
        IB_OPCODE_SEND_FIRST                        = 0x00,