IB/qib: Change SDMA progression mode depending on single- or multi-rail
authorCQ Tang <cq.tang@intel.com>
Thu, 30 Jan 2014 22:36:00 +0000 (17:36 -0500)
committerRoland Dreier <roland@purestorage.com>
Thu, 20 Mar 2014 17:19:12 +0000 (10:19 -0700)
Improve performance by changing the behavour of the driver when all
SDMA descriptors are in use, and the processes adding new descriptors
are single- or multi-rail.

For single-rail processes, the driver will block the call and finish
posting all SDMA descriptors onto the hardware queue before returning
back to PSM.  Repeated kernel calls are slower than blocking.

For multi-rail processes, the driver will return to PSM as quick as
possible so PSM can feed packets to other rail.  If all hardware
queues are full, PSM will buffer the remaining SDMA descriptors until
notified by interrupt that space is available.

This patch builds a red-black tree to track the number rails opened by
a particular PID. If the number is more than one, it is a multi-rail
PSM process, otherwise, it is a single-rail process.

Reviewed-by: Dean Luick <dean.luick@intel.com>
Reviewed-by: John A Gregor <john.a.gregor@intel.com>
Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: CQ Tang <cq.tang@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
drivers/infiniband/hw/qib/qib_user_sdma.c

index 165aee2ca8a0c38dfc1e00b1965172acf4f30193..d2806cae234c254ef7e71ee58e03df5d2c1efdc9 100644 (file)
 /* attempt to drain the queue for 5secs */
 #define QIB_USER_SDMA_DRAIN_TIMEOUT 500
 
+/*
+ * track how many times a process open this driver.
+ */
+static struct rb_root qib_user_sdma_rb_root = RB_ROOT;
+
+struct qib_user_sdma_rb_node {
+       struct rb_node node;
+       int refcount;
+       pid_t pid;
+};
+
 struct qib_user_sdma_pkt {
        struct list_head list;  /* list element */
 
@@ -120,15 +131,60 @@ struct qib_user_sdma_queue {
        /* dma page table */
        struct rb_root dma_pages_root;
 
+       struct qib_user_sdma_rb_node *sdma_rb_node;
+
        /* protect everything above... */
        struct mutex lock;
 };
 
+static struct qib_user_sdma_rb_node *
+qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
+{
+       struct qib_user_sdma_rb_node *sdma_rb_node;
+       struct rb_node *node = root->rb_node;
+
+       while (node) {
+               sdma_rb_node = container_of(node,
+                       struct qib_user_sdma_rb_node, node);
+               if (pid < sdma_rb_node->pid)
+                       node = node->rb_left;
+               else if (pid > sdma_rb_node->pid)
+                       node = node->rb_right;
+               else
+                       return sdma_rb_node;
+       }
+       return NULL;
+}
+
+static int
+qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
+{
+       struct rb_node **node = &(root->rb_node);
+       struct rb_node *parent = NULL;
+       struct qib_user_sdma_rb_node *got;
+
+       while (*node) {
+               got = container_of(*node, struct qib_user_sdma_rb_node, node);
+               parent = *node;
+               if (new->pid < got->pid)
+                       node = &((*node)->rb_left);
+               else if (new->pid > got->pid)
+                       node = &((*node)->rb_right);
+               else
+                       return 0;
+       }
+
+       rb_link_node(&new->node, parent, node);
+       rb_insert_color(&new->node, root);
+       return 1;
+}
+
 struct qib_user_sdma_queue *
 qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
 {
        struct qib_user_sdma_queue *pq =
                kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
+       struct qib_user_sdma_rb_node *sdma_rb_node;
 
        if (!pq)
                goto done;
@@ -138,6 +194,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
        pq->num_pending = 0;
        pq->num_sending = 0;
        pq->added = 0;
+       pq->sdma_rb_node = NULL;
 
        INIT_LIST_HEAD(&pq->sent);
        spin_lock_init(&pq->sent_lock);
@@ -163,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
 
        pq->dma_pages_root = RB_ROOT;
 
+       sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root,
+                                       current->pid);
+       if (sdma_rb_node) {
+               sdma_rb_node->refcount++;
+       } else {
+               int ret;
+               sdma_rb_node = kmalloc(sizeof(
+                       struct qib_user_sdma_rb_node), GFP_KERNEL);
+               if (!sdma_rb_node)
+                       goto err_rb;
+
+               sdma_rb_node->refcount = 1;
+               sdma_rb_node->pid = current->pid;
+
+               ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
+                                       sdma_rb_node);
+               BUG_ON(ret == 0);
+       }
+       pq->sdma_rb_node = sdma_rb_node;
+
        goto done;
 
+err_rb:
+       dma_pool_destroy(pq->header_cache);
 err_slab:
        kmem_cache_destroy(pq->pkt_slab);
 err_kfree:
@@ -1020,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
        if (!pq)
                return;
 
-       kmem_cache_destroy(pq->pkt_slab);
+       pq->sdma_rb_node->refcount--;
+       if (pq->sdma_rb_node->refcount == 0) {
+               rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root);
+               kfree(pq->sdma_rb_node);
+       }
        dma_pool_destroy(pq->header_cache);
+       kmem_cache_destroy(pq->pkt_slab);
        kfree(pq);
 }
 
@@ -1241,26 +1325,52 @@ static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
                                 struct qib_user_sdma_queue *pq,
                                 struct list_head *pktlist, int count)
 {
-       int ret = 0;
        unsigned long flags;
 
        if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
                return -ECOMM;
 
-       spin_lock_irqsave(&ppd->sdma_lock, flags);
-
-       if (unlikely(!__qib_sdma_running(ppd))) {
-               ret = -ECOMM;
-               goto unlock;
+       /* non-blocking mode */
+       if (pq->sdma_rb_node->refcount > 1) {
+               spin_lock_irqsave(&ppd->sdma_lock, flags);
+               if (unlikely(!__qib_sdma_running(ppd))) {
+                       spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+                       return -ECOMM;
+               }
+               pq->num_pending += count;
+               list_splice_tail_init(pktlist, &ppd->sdma_userpending);
+               qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+               spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+               return 0;
        }
 
+       /* In this case, descriptors from this process are not
+        * linked to ppd pending queue, interrupt handler
+        * won't update this process, it is OK to directly
+        * modify without sdma lock.
+        */
+
+
        pq->num_pending += count;
-       list_splice_tail_init(pktlist, &ppd->sdma_userpending);
-       qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+       /*
+        * Blocking mode for single rail process, we must
+        * release/regain sdma_lock to give other process
+        * chance to make progress. This is important for
+        * performance.
+        */
+       do {
+               spin_lock_irqsave(&ppd->sdma_lock, flags);
+               if (unlikely(!__qib_sdma_running(ppd))) {
+                       spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+                       return -ECOMM;
+               }
+               qib_user_sdma_send_desc(ppd, pktlist);
+               if (!list_empty(pktlist))
+                       qib_sdma_make_progress(ppd);
+               spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+       } while (!list_empty(pktlist));
 
-unlock:
-       spin_unlock_irqrestore(&ppd->sdma_lock, flags);
-       return ret;
+       return 0;
 }
 
 int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
@@ -1290,7 +1400,7 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
                qib_user_sdma_queue_clean(ppd, pq);
 
        while (dim) {
-               int mxp = 8;
+               int mxp = 1;
                int ndesc = 0;
 
                ret = qib_user_sdma_queue_pkts(dd, ppd, pq,