IB/hfi1: Add global structure for affinity assignments

author Dennis Dalessandro <dennis.dalessandro@intel.com>

Mon, 25 Jul 2016 14:52:36 +0000 (07:52 -0700)

committer Doug Ledford <dledford@redhat.com>

Tue, 2 Aug 2016 19:45:14 +0000 (15:45 -0400)
author Dennis Dalessandro <dennis.dalessandro@intel.com>
Mon, 25 Jul 2016 14:52:36 +0000 (07:52 -0700)
committer Doug Ledford <dledford@redhat.com>
Tue, 2 Aug 2016 19:45:14 +0000 (15:45 -0400)
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c

index 14d7eeb09be6545f5f21144f0f11ea41c53203a4..164769952ff7d8065dfc93e1650fe09e2a48498d 100644 (file)
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -53,6 +53,11 @@
  #include "sdma.h"
  #include "trace.h"
  
+struct hfi1_affinity_node_list node_affinity = {
+       .list = LIST_HEAD_INIT(node_affinity.list),
+       .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock),
+};
+
  /* Name of IRQ types, indexed by enum irq_type */
  static const char * const irq_type_names[] = {
         "SDMA",
@@ -69,45 +74,100 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set)
  }
  
  /* Initialize non-HT cpu cores mask */
-int init_real_cpu_mask(struct hfi1_devdata *dd)
+void init_real_cpu_mask(void)
  {
-       struct hfi1_affinity *info;
         int possible, curr_cpu, i, ht;
  
-       info = kzalloc(sizeof(*info), GFP_KERNEL);
-       if (!info)
-               return -ENOMEM;
-
-       cpumask_clear(&info->real_cpu_mask);
+       cpumask_clear(&node_affinity.real_cpu_mask);
  
         /* Start with cpu online mask as the real cpu mask */
-       cpumask_copy(&info->real_cpu_mask, cpu_online_mask);
+       cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask);
  
         /*
          * Remove HT cores from the real cpu mask.  Do this in two steps below.
          */
-       possible = cpumask_weight(&info->real_cpu_mask);
+       possible = cpumask_weight(&node_affinity.real_cpu_mask);
         ht = cpumask_weight(topology_sibling_cpumask(
-                                       cpumask_first(&info->real_cpu_mask)));
+                               cpumask_first(&node_affinity.real_cpu_mask)));
         /*
          * Step 1.  Skip over the first N HT siblings and use them as the
          * "real" cores.  Assumes that HT cores are not enumerated in
          * succession (except in the single core case).
          */
-       curr_cpu = cpumask_first(&info->real_cpu_mask);
+       curr_cpu = cpumask_first(&node_affinity.real_cpu_mask);
         for (i = 0; i < possible / ht; i++)
-               curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
+               curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
         /*
          * Step 2.  Remove the remaining HT siblings.  Use cpumask_next() to
          * skip any gaps.
          */
         for (; i < possible; i++) {
-               cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask);
-               curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
+               cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask);
+               curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
         }
+}
  
-       dd->affinity = info;
-       return 0;
+void node_affinity_init(void)
+{
+       cpumask_copy(&node_affinity.proc.mask, cpu_online_mask);
+       /*
+        * The real cpu mask is part of the affinity struct but it has to be
+        * initialized early. It is needed to calculate the number of user
+        * contexts in set_up_context_variables().
+        */
+       init_real_cpu_mask();
+}
+
+void node_affinity_destroy(void)
+{
+       struct list_head *pos, *q;
+       struct hfi1_affinity_node *entry;
+
+       spin_lock(&node_affinity.lock);
+       list_for_each_safe(pos, q, &node_affinity.list) {
+               entry = list_entry(pos, struct hfi1_affinity_node,
+                                  list);
+               list_del(pos);
+               kfree(entry);
+       }
+       spin_unlock(&node_affinity.lock);
+}
+
+static struct hfi1_affinity_node *node_affinity_allocate(int node)
+{
+       struct hfi1_affinity_node *entry;
+
+       entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+       if (!entry)
+               return NULL;
+       entry->node = node;
+       INIT_LIST_HEAD(&entry->list);
+
+       return entry;
+}
+
+/*
+ * It appends an entry to the list.
+ * It *must* be called with node_affinity.lock held.
+ */
+static void node_affinity_add_tail(struct hfi1_affinity_node *entry)
+{
+       list_add_tail(&entry->list, &node_affinity.list);
+}
+
+/* It must be called with node_affinity.lock held */
+static struct hfi1_affinity_node *node_affinity_lookup(int node)
+{
+       struct list_head *pos;
+       struct hfi1_affinity_node *entry;
+
+       list_for_each(pos, &node_affinity.list) {
+               entry = list_entry(pos, struct hfi1_affinity_node, list);
+               if (entry->node == node)
+                       return entry;
+       }
+
+       return NULL;
  }
  
  /*
@@ -121,10 +181,10 @@ int init_real_cpu_mask(struct hfi1_devdata *dd)
   * to the node relative 1 as necessary.
   *
   */
-void hfi1_dev_affinity_init(struct hfi1_devdata *dd)
+int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
  {
         int node = pcibus_to_node(dd->pcidev->bus);
-       struct hfi1_affinity *info = dd->affinity;
+       struct hfi1_affinity_node *entry;
         const struct cpumask *local_mask;
         int curr_cpu, possible, i;
  
@@ -132,55 +192,75 @@ void hfi1_dev_affinity_init(struct hfi1_devdata *dd)
                 node = numa_node_id();
         dd->node = node;
  
-       spin_lock_init(&info->lock);
-
-       init_cpu_mask_set(&info->def_intr);
-       init_cpu_mask_set(&info->rcv_intr);
-       init_cpu_mask_set(&info->proc);
-
         local_mask = cpumask_of_node(dd->node);
         if (cpumask_first(local_mask) >= nr_cpu_ids)
                 local_mask = topology_core_cpumask(0);
-       /* Use the "real" cpu mask of this node as the default */
-       cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask);
-
-       /*  fill in the receive list */
-       possible = cpumask_weight(&info->def_intr.mask);
-       curr_cpu = cpumask_first(&info->def_intr.mask);
-       if (possible == 1) {
-               /*  only one CPU, everyone will use it */
-               cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
-       } else {
-               /*
-                * Retain the first CPU in the default list for the control
-                * context.
-                */
-               curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
-               /*
-                * Remove the remaining kernel receive queues from
-                * the default list and add them to the receive list.
-                */
-               for (i = 0; i < dd->n_krcv_queues - 1; i++) {
-                       cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
-                       cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
-                       curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
-                       if (curr_cpu >= nr_cpu_ids)
-                               break;
+
+       spin_lock(&node_affinity.lock);
+       entry = node_affinity_lookup(dd->node);
+       spin_unlock(&node_affinity.lock);
+
+       /*
+        * If this is the first time this NUMA node's affinity is used,
+        * create an entry in the global affinity structure and initialize it.
+        */
+       if (!entry) {
+               entry = node_affinity_allocate(node);
+               if (!entry) {
+                       dd_dev_err(dd,
+                                  "Unable to allocate global affinity node\n");
+                       return -ENOMEM;
                 }
-       }
+               init_cpu_mask_set(&entry->def_intr);
+               init_cpu_mask_set(&entry->rcv_intr);
+               /* Use the "real" cpu mask of this node as the default */
+               cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask,
+                           local_mask);
+
+               /* fill in the receive list */
+               possible = cpumask_weight(&entry->def_intr.mask);
+               curr_cpu = cpumask_first(&entry->def_intr.mask);
+
+               if (possible == 1) {
+                       /* only one CPU, everyone will use it */
+                       cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask);
+               } else {
+                       /*
+                        * Retain the first CPU in the default list for the
+                        * control context.
+                        */
+                       curr_cpu = cpumask_next(curr_cpu,
+                                               &entry->def_intr.mask);
  
-       cpumask_copy(&info->proc.mask, cpu_online_mask);
-}
+                       /*
+                        * Remove the remaining kernel receive queues from
+                        * the default list and add them to the receive list.
+                        */
+                       for (i = 0; i < dd->n_krcv_queues - 1; i++) {
+                               cpumask_clear_cpu(curr_cpu,
+                                                 &entry->def_intr.mask);
+                               cpumask_set_cpu(curr_cpu,
+                                               &entry->rcv_intr.mask);
+                               curr_cpu = cpumask_next(curr_cpu,
+                                                       &entry->def_intr.mask);
+                               if (curr_cpu >= nr_cpu_ids)
+                                       break;
+                       }
+               }
  
-void hfi1_dev_affinity_free(struct hfi1_devdata *dd)
-{
-       kfree(dd->affinity);
+               spin_lock(&node_affinity.lock);
+               node_affinity_add_tail(entry);
+               spin_unlock(&node_affinity.lock);
+       }
+
+       return 0;
  }
  
  int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
  {
         int ret;
         cpumask_var_t diff;
+       struct hfi1_affinity_node *entry;
         struct cpu_mask_set *set;
         struct sdma_engine *sde = NULL;
         struct hfi1_ctxtdata *rcd = NULL;
@@ -194,21 +274,25 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
         if (!ret)
                 return -ENOMEM;
  
+       spin_lock(&node_affinity.lock);
+       entry = node_affinity_lookup(dd->node);
+       spin_unlock(&node_affinity.lock);
+
         switch (msix->type) {
         case IRQ_SDMA:
                 sde = (struct sdma_engine *)msix->arg;
                 scnprintf(extra, 64, "engine %u", sde->this_idx);
                 /* fall through */
         case IRQ_GENERAL:
-               set = &dd->affinity->def_intr;
+               set = &entry->def_intr;
                 break;
         case IRQ_RCVCTXT:
                 rcd = (struct hfi1_ctxtdata *)msix->arg;
                 if (rcd->ctxt == HFI1_CTRL_CTXT) {
-                       set = &dd->affinity->def_intr;
+                       set = &entry->def_intr;
                         cpu = cpumask_first(&set->mask);
                 } else {
-                       set = &dd->affinity->rcv_intr;
+                       set = &entry->rcv_intr;
                 }
                 scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
                 break;
@@ -222,8 +306,8 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
          * is set above.  Skip accounting for it.  Everything else finds its
          * CPU here.
          */
-       if (cpu == -1) {
-               spin_lock(&dd->affinity->lock);
+       if (cpu == -1 && set) {
+               spin_lock(&node_affinity.lock);
                 if (cpumask_equal(&set->mask, &set->used)) {
                         /*
                          * We've used up all the CPUs, bump up the generation
@@ -235,7 +319,7 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
                 cpumask_andnot(diff, &set->mask, &set->used);
                 cpu = cpumask_first(diff);
                 cpumask_set_cpu(cpu, &set->used);
-               spin_unlock(&dd->affinity->lock);
+               spin_unlock(&node_affinity.lock);
         }
  
         switch (msix->type) {
@@ -263,30 +347,35 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
  {
         struct cpu_mask_set *set = NULL;
         struct hfi1_ctxtdata *rcd;
+       struct hfi1_affinity_node *entry;
+
+       spin_lock(&node_affinity.lock);
+       entry = node_affinity_lookup(dd->node);
+       spin_unlock(&node_affinity.lock);
  
         switch (msix->type) {
         case IRQ_SDMA:
         case IRQ_GENERAL:
-               set = &dd->affinity->def_intr;
+               set = &entry->def_intr;
                 break;
         case IRQ_RCVCTXT:
                 rcd = (struct hfi1_ctxtdata *)msix->arg;
                 /* only do accounting for non control contexts */
                 if (rcd->ctxt != HFI1_CTRL_CTXT)
-                       set = &dd->affinity->rcv_intr;
+                       set = &entry->rcv_intr;
                 break;
         default:
                 return;
         }
  
         if (set) {
-               spin_lock(&dd->affinity->lock);
+               spin_lock(&node_affinity.lock);
                 cpumask_andnot(&set->used, &set->used, &msix->mask);
                 if (cpumask_empty(&set->used) && set->gen) {
                         set->gen--;
                         cpumask_copy(&set->used, &set->mask);
                 }
-               spin_unlock(&dd->affinity->lock);
+               spin_unlock(&node_affinity.lock);
         }
  
         irq_set_affinity_hint(msix->msix.vector, NULL);
@@ -297,9 +386,10 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
  {
         int cpu = -1, ret;
         cpumask_var_t diff, mask, intrs;
+       struct hfi1_affinity_node *entry;
         const struct cpumask *node_mask,
                 *proc_mask = tsk_cpus_allowed(current);
-       struct cpu_mask_set *set = &dd->affinity->proc;
+       struct cpu_mask_set *set = &node_affinity.proc;
  
         /*
          * check whether process/context affinity has already
@@ -338,7 +428,7 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
         if (!ret)
                 goto free_mask;
  
-       spin_lock(&dd->affinity->lock);
+       spin_lock(&node_affinity.lock);
         /*
          * If we've used all available CPUs, clear the mask and start
          * overloading.
@@ -348,13 +438,14 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
                 cpumask_clear(&set->used);
         }
  
+       entry = node_affinity_lookup(dd->node);
         /* CPUs used by interrupt handlers */
-       cpumask_copy(intrs, (dd->affinity->def_intr.gen ?
-                            &dd->affinity->def_intr.mask :
-                            &dd->affinity->def_intr.used));
-       cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ?
-                                 &dd->affinity->rcv_intr.mask :
-                                 &dd->affinity->rcv_intr.used));
+       cpumask_copy(intrs, (entry->def_intr.gen ?
+                            &entry->def_intr.mask :
+                            &entry->def_intr.used));
+       cpumask_or(intrs, intrs, (entry->rcv_intr.gen ?
+                                 &entry->rcv_intr.mask :
+                                 &entry->rcv_intr.used));
         hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl",
                   cpumask_pr_args(intrs));
  
@@ -400,7 +491,7 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
                 cpu = -1;
         else
                 cpumask_set_cpu(cpu, &set->used);
-       spin_unlock(&dd->affinity->lock);
+       spin_unlock(&node_affinity.lock);
  
         free_cpumask_var(intrs);
  free_mask:
@@ -413,16 +504,16 @@ done:
  
  void hfi1_put_proc_affinity(struct hfi1_devdata *dd, int cpu)
  {
-       struct cpu_mask_set *set = &dd->affinity->proc;
+       struct cpu_mask_set *set = &node_affinity.proc;
  
         if (cpu < 0)
                 return;
-       spin_lock(&dd->affinity->lock);
+       spin_lock(&node_affinity.lock);
         cpumask_clear_cpu(cpu, &set->used);
         if (cpumask_empty(&set->used) && set->gen) {
                 set->gen--;
                 cpumask_copy(&set->used, &set->mask);
         }
-       spin_unlock(&dd->affinity->lock);
+       spin_unlock(&node_affinity.lock);
  }
  
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h

index 20f52fe7409161cb772b90aae96f8e4c7a012e05..ad3e730a8d8f7e4d0836c025160888253b22b21e 100644 (file)
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -82,11 +82,9 @@ struct hfi1_affinity {
  struct hfi1_msix_entry;
  
  /* Initialize non-HT cpu cores mask */
-int init_real_cpu_mask(struct hfi1_devdata *);
+void init_real_cpu_mask(void);
  /* Initialize driver affinity data */
-void hfi1_dev_affinity_init(struct hfi1_devdata *);
-/* Free driver affinity data */
-void hfi1_dev_affinity_free(struct hfi1_devdata *);
+int hfi1_dev_affinity_init(struct hfi1_devdata *);
  /*
   * Set IRQ affinity to a CPU. The function will determine the
   * CPU and set the affinity to it.
@@ -105,4 +103,23 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *, int);
  /* Release a CPU used by a user process. */
  void hfi1_put_proc_affinity(struct hfi1_devdata *, int);
  
+struct hfi1_affinity_node {
+       int node;
+       struct cpu_mask_set def_intr;
+       struct cpu_mask_set rcv_intr;
+       struct list_head list;
+};
+
+struct hfi1_affinity_node_list {
+       struct list_head list;
+       struct cpumask real_cpu_mask;
+       struct cpu_mask_set proc;
+       /* protect affinity node list */
+       spinlock_t lock;
+};
+
+void node_affinity_init(void);
+void node_affinity_destroy(void);
+extern struct hfi1_affinity_node_list node_affinity;
+
  #endif /* _HFI1_AFFINITY_H */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c

index 97ce886bb17154357b1b54ab5f1e6b19251640c5..0de6c0ca70782d89729e109af879bc34f8b03b8b 100644 (file)
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -63,6 +63,7 @@
  #include "efivar.h"
  #include "platform.h"
  #include "aspm.h"
+#include "affinity.h"
  
  #define NUM_IB_PORTS 1
  
@@ -12838,7 +12839,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
          */
         if (num_user_contexts < 0)
                 num_user_contexts =
-                       cpumask_weight(&dd->affinity->real_cpu_mask);
+                       cpumask_weight(&node_affinity.real_cpu_mask);
  
         total_contexts = num_kernel_contexts + num_user_contexts;
  
@@ -14468,19 +14469,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
                  (dd->revision >> CCE_REVISION_SW_SHIFT)
                     & CCE_REVISION_SW_MASK);
  
-       /*
-        * The real cpu mask is part of the affinity struct but has to be
-        * initialized earlier than the rest of the affinity struct because it
-        * is needed to calculate the number of user contexts in
-        * set_up_context_variables(). However, hfi1_dev_affinity_init(),
-        * which initializes the rest of the affinity struct members,
-        * depends on set_up_context_variables() for the number of kernel
-        * contexts, so it cannot be called before set_up_context_variables().
-        */
-       ret = init_real_cpu_mask(dd);
-       if (ret)
-               goto bail_cleanup;
-
         ret = set_up_context_variables(dd);
         if (ret)
                 goto bail_cleanup;
@@ -14494,7 +14482,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
         /* set up KDETH QP prefix in both RX and TX CSRs */
         init_kdeth_qp(dd);
  
-       hfi1_dev_affinity_init(dd);
+       ret = hfi1_dev_affinity_init(dd);
+       if (ret)
+               goto bail_cleanup;
  
         /* send contexts must be set up before receive contexts */
         ret = init_send_contexts(dd);
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c

index eed971ccd2a1e88e7180f2eae5aba92da5448c96..b0c3e8a977252c540b7e3c35cc6bfab2bef3560b 100644 (file)
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -64,6 +64,7 @@
  #include "debugfs.h"
  #include "verbs.h"
  #include "aspm.h"
+#include "affinity.h"
  
  #undef pr_fmt
  #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -1004,7 +1005,6 @@ static void __hfi1_free_devdata(struct kobject *kobj)
         rcu_barrier(); /* wait for rcu callbacks to complete */
         free_percpu(dd->int_counter);
         free_percpu(dd->rcv_limit);
-       hfi1_dev_affinity_free(dd);
         free_percpu(dd->send_schedule);
         rvt_dealloc_device(&dd->verbs_dev.rdi);
  }
@@ -1198,6 +1198,8 @@ static int __init hfi1_mod_init(void)
         if (ret)
                 goto bail;
  
+       node_affinity_init();
+
         /* validate max MTU before any devices start */
         if (!valid_opa_max_mtu(hfi1_max_mtu)) {
                 pr_err("Invalid max_mtu 0x%x, using 0x%x instead\n",
@@ -1278,6 +1280,7 @@ module_init(hfi1_mod_init);
  static void __exit hfi1_mod_cleanup(void)
  {
         pci_unregister_driver(&hfi1_pci_driver);
+       node_affinity_destroy();
         hfi1_wss_exit();
         hfi1_dbg_exit();
         hfi1_cpulist_count = 0;
author	Dennis Dalessandro <dennis.dalessandro@intel.com>
	Mon, 25 Jul 2016 14:52:36 +0000 (07:52 -0700)
committer	Doug Ledford <dledford@redhat.com>
	Tue, 2 Aug 2016 19:45:14 +0000 (15:45 -0400)
drivers/infiniband/hw/hfi1/affinity.c		patch \| blob \| blame \| history
drivers/infiniband/hw/hfi1/affinity.h		patch \| blob \| blame \| history
drivers/infiniband/hw/hfi1/chip.c		patch \| blob \| blame \| history
drivers/infiniband/hw/hfi1/init.c		patch \| blob \| blame \| history