IB/hfi1: Add unique txwait_lock for txreq events
authorMike Marciniszyn <mike.marciniszyn@intel.com>
Mon, 10 Oct 2016 13:14:28 +0000 (06:14 -0700)
committerDoug Ledford <dledford@redhat.com>
Tue, 15 Nov 2016 21:25:59 +0000 (16:25 -0500)
Profiling suggests that the read_seqbegin() in
the txreq put logic is colliding with other uses
of the iowait lock.

The packet at a time use of this lock dictates a unique
lock to avoid reader/writer collisions when the number
of vTxWait events is low.

In order to support a unique lock the iowait struct embedded
in the QP is extended to remember the lock that protects the queue
head.

The QP destroy removes that QP from any wait list.  It doesn't
need to know the head because of the linked list API, but it does
need to know the lock required to protect the head.

This also opens up the wait logic to have unique per resources locks
which needs to be in future refinement.

Reviewed-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/iowait.h
drivers/infiniband/hw/hfi1/pio.c
drivers/infiniband/hw/hfi1/qp.c
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/hfi1/verbs.h
drivers/infiniband/hw/hfi1/verbs_txreq.c

index 2ec6ef38d38953c517f71936cafa49222b63e85c..d9740ddea6f12fe856038842de3b3e7844f14315 100644 (file)
@@ -64,6 +64,7 @@ struct sdma_engine;
 /**
  * struct iowait - linkage for delayed progress/waiting
  * @list: used to add/insert into QP/PQ wait lists
+ * @lock: uses to record the list head lock
  * @tx_head: overflow list of sdma_txreq's
  * @sleep: no space callback
  * @wakeup: space callback wakeup
@@ -91,6 +92,11 @@ struct sdma_engine;
  * so sleeping is not allowed.
  *
  * The wait_dma member along with the iow
+ *
+ * The lock field is used by waiters to record
+ * the seqlock_t that guards the list head.
+ * Waiters explicity know that, but the destroy
+ * code that unwaits QPs does not.
  */
 
 struct iowait {
@@ -103,6 +109,7 @@ struct iowait {
                unsigned seq);
        void (*wakeup)(struct iowait *wait, int reason);
        void (*sdma_drained)(struct iowait *wait);
+       seqlock_t *lock;
        struct work_struct iowork;
        wait_queue_head_t wait_dma;
        wait_queue_head_t wait_pio;
@@ -141,6 +148,7 @@ static inline void iowait_init(
        void (*sdma_drained)(struct iowait *wait))
 {
        wait->count = 0;
+       wait->lock = NULL;
        INIT_LIST_HEAD(&wait->list);
        INIT_LIST_HEAD(&wait->tx_head);
        INIT_WORK(&wait->iowork, func);
index 50a3a36d93632d3e6fbb3a7afced466be2c4547b..385e4dcf2cd3c2e088db42c0f9e4db8917954639 100644 (file)
@@ -1580,6 +1580,7 @@ static void sc_piobufavail(struct send_context *sc)
                qp = iowait_to_qp(wait);
                priv = qp->priv;
                list_del_init(&priv->s_iowait.list);
+               priv->s_iowait.lock = NULL;
                /* refcount held until actual wake up */
                qps[n++] = qp;
        }
index 9fc75e7e8781be3c8cd110f6afce84e06f194cf7..d752d6768a49c4f9c151eac3b7db63e3c8ee3d9b 100644 (file)
@@ -196,15 +196,18 @@ static void flush_tx_list(struct rvt_qp *qp)
 static void flush_iowait(struct rvt_qp *qp)
 {
        struct hfi1_qp_priv *priv = qp->priv;
-       struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
        unsigned long flags;
+       seqlock_t *lock = priv->s_iowait.lock;
 
-       write_seqlock_irqsave(&dev->iowait_lock, flags);
+       if (!lock)
+               return;
+       write_seqlock_irqsave(lock, flags);
        if (!list_empty(&priv->s_iowait.list)) {
                list_del_init(&priv->s_iowait.list);
+               priv->s_iowait.lock = NULL;
                rvt_put_qp(qp);
        }
-       write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+       write_sequnlock_irqrestore(lock, flags);
 }
 
 static inline int opa_mtu_enum_to_int(int mtu)
@@ -543,6 +546,7 @@ static int iowait_sleep(
                        ibp->rvp.n_dmawait++;
                        qp->s_flags |= RVT_S_WAIT_DMA_DESC;
                        list_add_tail(&priv->s_iowait.list, &sde->dmawait);
+                       priv->s_iowait.lock = &dev->iowait_lock;
                        trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
                        rvt_get_qp(qp);
                }
@@ -964,6 +968,7 @@ void notify_error_qp(struct rvt_qp *qp)
        if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
                qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
                list_del_init(&priv->s_iowait.list);
+               priv->s_iowait.lock = NULL;
                rvt_put_qp(qp);
        }
        write_sequnlock(&dev->iowait_lock);
index 4b7a16ceb3629cad0cc8d8b5322e997ec4ed4486..feecacb62162f92ee6abf76a71d68e494c3bdf37 100644 (file)
@@ -694,6 +694,7 @@ static void mem_timer(unsigned long data)
                qp = iowait_to_qp(wait);
                priv = qp->priv;
                list_del_init(&priv->s_iowait.list);
+               priv->s_iowait.lock = NULL;
                /* refcount held until actual wake up */
                if (!list_empty(list))
                        mod_timer(&dev->mem_timer, jiffies + 1);
@@ -769,6 +770,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
                                mod_timer(&dev->mem_timer, jiffies + 1);
                        qp->s_flags |= RVT_S_WAIT_KMEM;
                        list_add_tail(&priv->s_iowait.list, &dev->memwait);
+                       priv->s_iowait.lock = &dev->iowait_lock;
                        trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
                        rvt_get_qp(qp);
                }
@@ -980,6 +982,7 @@ static int pio_wait(struct rvt_qp *qp,
                        qp->s_flags |= flag;
                        was_empty = list_empty(&sc->piowait);
                        list_add_tail(&priv->s_iowait.list, &sc->piowait);
+                       priv->s_iowait.lock = &dev->iowait_lock;
                        trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
                        rvt_get_qp(qp);
                        /* counting: only call wantpiobuf_intr if first user */
@@ -1632,6 +1635,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
        setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
 
        seqlock_init(&dev->iowait_lock);
+       seqlock_init(&dev->txwait_lock);
        INIT_LIST_HEAD(&dev->txwait);
        INIT_LIST_HEAD(&dev->memwait);
 
index 1c3815d89eb79c052d1147175538bd619cfef8a5..7a8af39e99d4b250113b3ef81fb1c26b9ad25c05 100644 (file)
@@ -180,18 +180,19 @@ struct hfi1_ibdev {
        struct rvt_dev_info rdi; /* Must be first */
 
        /* QP numbers are shared by all IB ports */
-       /* protect wait lists */
-       seqlock_t iowait_lock;
+       /* protect txwait list */
+       seqlock_t txwait_lock ____cacheline_aligned_in_smp;
        struct list_head txwait;        /* list for wait verbs_txreq */
        struct list_head memwait;       /* list for wait kernel memory */
-       struct list_head txreq_free;
        struct kmem_cache *verbs_txreq_cache;
-       struct timer_list mem_timer;
+       u64 n_txwait;
+       u64 n_kmem_wait;
 
+       /* protect iowait lists */
+       seqlock_t iowait_lock ____cacheline_aligned_in_smp;
        u64 n_piowait;
        u64 n_piodrain;
-       u64 n_txwait;
-       u64 n_kmem_wait;
+       struct timer_list mem_timer;
 
 #ifdef CONFIG_DEBUG_FS
        /* per HFI debugfs */
index 094ab829ec42fdf8ae75a720fbd2a97500cf9994..5d23172c470fd27b5f3ae66c8bb1d02cca4054b1 100644 (file)
@@ -72,22 +72,22 @@ void hfi1_put_txreq(struct verbs_txreq *tx)
        kmem_cache_free(dev->verbs_txreq_cache, tx);
 
        do {
-               seq = read_seqbegin(&dev->iowait_lock);
+               seq = read_seqbegin(&dev->txwait_lock);
                if (!list_empty(&dev->txwait)) {
                        struct iowait *wait;
 
-                       write_seqlock_irqsave(&dev->iowait_lock, flags);
+                       write_seqlock_irqsave(&dev->txwait_lock, flags);
                        wait = list_first_entry(&dev->txwait, struct iowait,
                                                list);
                        qp = iowait_to_qp(wait);
                        priv = qp->priv;
                        list_del_init(&priv->s_iowait.list);
                        /* refcount held until actual wake up */
-                       write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+                       write_sequnlock_irqrestore(&dev->txwait_lock, flags);
                        hfi1_qp_wakeup(qp, RVT_S_WAIT_TX);
                        break;
                }
-       } while (read_seqretry(&dev->iowait_lock, seq));
+       } while (read_seqretry(&dev->txwait_lock, seq));
 }
 
 struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
@@ -96,7 +96,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 {
        struct verbs_txreq *tx = ERR_PTR(-EBUSY);
 
-       write_seqlock(&dev->iowait_lock);
+       write_seqlock(&dev->txwait_lock);
        if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
                struct hfi1_qp_priv *priv;
 
@@ -108,13 +108,14 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
                        dev->n_txwait++;
                        qp->s_flags |= RVT_S_WAIT_TX;
                        list_add_tail(&priv->s_iowait.list, &dev->txwait);
+                       priv->s_iowait.lock = &dev->txwait_lock;
                        trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
                        rvt_get_qp(qp);
                }
                qp->s_flags &= ~RVT_S_BUSY;
        }
 out:
-       write_sequnlock(&dev->iowait_lock);
+       write_sequnlock(&dev->txwait_lock);
        return tx;
 }