x86/intel_rdt/cqm: Improve limbo list processing

author Vikas Shivappa <vikas.shivappa@linux.intel.com>

Wed, 16 Aug 2017 01:00:43 +0000 (18:00 -0700)

committer Thomas Gleixner <tglx@linutronix.de>

Wed, 16 Aug 2017 10:05:41 +0000 (12:05 +0200)
author Vikas Shivappa <vikas.shivappa@linux.intel.com>
Wed, 16 Aug 2017 01:00:43 +0000 (18:00 -0700)
committer Thomas Gleixner <tglx@linutronix.de>
Wed, 16 Aug 2017 10:05:41 +0000 (12:05 +0200)
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c

index b8dc141896b6ee036da70329295bcb9e2afc75cb..6935c8ecad7fc1f7ce723f660ce671c38c611dfe 100644 (file)
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -426,6 +426,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
                                            GFP_KERNEL);
                 if (!d->rmid_busy_llc)
                         return -ENOMEM;
+               INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
         }
         if (is_mbm_total_enabled()) {
                 tsize = sizeof(*d->mbm_total);
@@ -536,11 +537,33 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
                 list_del(&d->list);
                 if (is_mbm_enabled())
                         cancel_delayed_work(&d->mbm_over);
+               if (is_llc_occupancy_enabled() &&  has_busy_rmid(r, d)) {
+                       /*
+                        * When a package is going down, forcefully
+                        * decrement rmid->ebusy. There is no way to know
+                        * that the L3 was flushed and hence may lead to
+                        * incorrect counts in rare scenarios, but leaving
+                        * the RMID as busy creates RMID leaks if the
+                        * package never comes back.
+                        */
+                       __check_limbo(d, true);
+                       cancel_delayed_work(&d->cqm_limbo);
+               }
+
                 kfree(d);
-       } else if (r == &rdt_resources_all[RDT_RESOURCE_L3] &&
-                  cpu == d->mbm_work_cpu && is_mbm_enabled()) {
-               cancel_delayed_work(&d->mbm_over);
-               mbm_setup_overflow_handler(d, 0);
+               return;
+       }
+
+       if (r == &rdt_resources_all[RDT_RESOURCE_L3]) {
+               if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+                       cancel_delayed_work(&d->mbm_over);
+                       mbm_setup_overflow_handler(d, 0);
+               }
+               if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
+                   has_busy_rmid(r, d)) {
+                       cancel_delayed_work(&d->cqm_limbo);
+                       cqm_setup_limbo_handler(d, 0);
+               }
         }
  }
  
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h

index 3e48693906031418223516aad9ca363f5f2585ef..ebaddaeef023f8625a36f1064425826dedb1a39d 100644 (file)
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -20,6 +20,8 @@
  #define QOS_L3_MBM_TOTAL_EVENT_ID      0x02
  #define QOS_L3_MBM_LOCAL_EVENT_ID      0x03
  
+#define CQM_LIMBOCHECK_INTERVAL        1000
+
  #define MBM_CNTR_WIDTH                 24
  #define MBM_OVERFLOW_INTERVAL          1000
  
@@ -187,8 +189,11 @@ struct mbm_state {
   * @mbm_total: saved state for MBM total bandwidth
   * @mbm_local: saved state for MBM local bandwidth
   * @mbm_over:  worker to periodically read MBM h/w counters
+ * @cqm_limbo: worker to periodically read CQM h/w counters
   * @mbm_work_cpu:
   *             worker cpu for MBM h/w counters
+ * @cqm_work_cpu:
+ *             worker cpu for CQM h/w counters
   * @ctrl_val:  array of cache or mem ctrl values (indexed by CLOSID)
   * @new_ctrl:  new ctrl value to be loaded
   * @have_new_ctrl: did user provide new_ctrl for this domain
@@ -201,7 +206,9 @@ struct rdt_domain {
         struct mbm_state        *mbm_total;
         struct mbm_state        *mbm_local;
         struct delayed_work     mbm_over;
+       struct delayed_work     cqm_limbo;
         int                     mbm_work_cpu;
+       int                     cqm_work_cpu;
         u32                     *ctrl_val;
         u32                     new_ctrl;
         bool                    have_new_ctrl;
@@ -422,7 +429,12 @@ void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
                                     struct rdt_domain *d);
  void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
                     struct rdtgroup *rdtgrp, int evtid, int first);
-void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms);
+void mbm_setup_overflow_handler(struct rdt_domain *dom,
+                               unsigned long delay_ms);
  void mbm_handle_overflow(struct work_struct *work);
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
+void cqm_handle_limbo(struct work_struct *work);
+bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
+void __check_limbo(struct rdt_domain *d, bool force_free);
  
  #endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c

index 8378785883dc2e03a638e4ed973cb7772aafe739..30827510094befb37aec9be1b356201a3644dd5e 100644 (file)
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -33,7 +33,7 @@
  
  struct rmid_entry {
         u32                             rmid;
-       atomic_t                        busy;
+       int                             busy;
         struct list_head                list;
  };
  
@@ -45,13 +45,13 @@ struct rmid_entry {
  static LIST_HEAD(rmid_free_lru);
  
  /**
- * @rmid_limbo_lru       list of currently unused but (potentially)
+ * @rmid_limbo_count     count of currently unused but (potentially)
   *     dirty RMIDs.
- *     This list contains RMIDs that no one is currently using but that
+ *     This counts RMIDs that no one is currently using but that
   *     may have a occupancy value > intel_cqm_threshold. User can change
   *     the threshold occupancy value.
   */
-static LIST_HEAD(rmid_limbo_lru);
+unsigned int rmid_limbo_count;
  
  /**
   * @rmid_entry - The entry in the limbo and free lists.
@@ -103,124 +103,53 @@ static u64 __rmid_read(u32 rmid, u32 eventid)
         return val;
  }
  
-/*
- * Walk the limbo list looking at any RMIDs that are flagged in the
- * domain rmid_busy_llc bitmap as busy. If the reported LLC occupancy
- * is below the threshold clear the busy bit and decrement the count.
- * If the busy count gets to zero on an RMID we stop looking.
- * This can be called from an IPI.
- * We need an atomic for the busy count because multiple CPUs may check
- * the same RMID at the same time.
- */
-static bool __check_limbo(struct rdt_domain *d)
-{
-       struct rmid_entry *entry;
-       u64 val;
-
-       list_for_each_entry(entry, &rmid_limbo_lru, list) {
-               if (!test_bit(entry->rmid, d->rmid_busy_llc))
-                       continue;
-               val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
-               if (val <= intel_cqm_threshold) {
-                       clear_bit(entry->rmid, d->rmid_busy_llc);
-                       if (atomic_dec_and_test(&entry->busy))
-                               return true;
-               }
-       }
-       return false;
-}
-
-static void check_limbo(void *arg)
+static bool rmid_dirty(struct rmid_entry *entry)
  {
-       struct rdt_domain *d;
-
-       d = get_domain_from_cpu(smp_processor_id(),
-                               &rdt_resources_all[RDT_RESOURCE_L3]);
-
-       if (d)
-               __check_limbo(d);
-}
+       u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
  
-static bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
-{
-       return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
+       return val >= intel_cqm_threshold;
  }
  
  /*
- * Scan the limbo list and move all entries that are below the
- * intel_cqm_threshold to the free list.
- * Return "true" if the limbo list is empty, "false" if there are
- * still some RMIDs there.
+ * Check the RMIDs that are marked as busy for this domain. If the
+ * reported LLC occupancy is below the threshold clear the busy bit and
+ * decrement the count. If the busy count gets to zero on an RMID, we
+ * free the RMID
   */
-static bool try_freeing_limbo_rmid(void)
+void __check_limbo(struct rdt_domain *d, bool force_free)
  {
-       struct rmid_entry *entry, *tmp;
+       struct rmid_entry *entry;
         struct rdt_resource *r;
-       cpumask_var_t cpu_mask;
-       struct rdt_domain *d;
-       bool ret = true;
-       int cpu;
-
-       if (list_empty(&rmid_limbo_lru))
-               return ret;
+       u32 crmid = 1, nrmid;
  
         r = &rdt_resources_all[RDT_RESOURCE_L3];
  
-       cpu = get_cpu();
-
         /*
-        * First see if we can free up an RMID by checking busy values
-        * on the local package.
+        * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
+        * are marked as busy for occupancy < threshold. If the occupancy
+        * is less than the threshold decrement the busy counter of the
+        * RMID and move it to the free list when the counter reaches 0.
          */
-       d = get_domain_from_cpu(cpu, r);
-       if (d && has_busy_rmid(r, d) && __check_limbo(d)) {
-               list_for_each_entry_safe(entry, tmp, &rmid_limbo_lru, list) {
-                       if (atomic_read(&entry->busy) == 0) {
-                               list_del(&entry->list);
+       for (;;) {
+               nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
+               if (nrmid >= r->num_rmid)
+                       break;
+
+               entry = __rmid_entry(nrmid);
+               if (force_free || !rmid_dirty(entry)) {
+                       clear_bit(entry->rmid, d->rmid_busy_llc);
+                       if (!--entry->busy) {
+                               rmid_limbo_count--;
                                 list_add_tail(&entry->list, &rmid_free_lru);
-                               goto done;
                         }
                 }
+               crmid = nrmid + 1;
         }
+}
  
-       if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) {
-               ret = false;
-               goto done;
-       }
-
-       /*
-        * Build a mask of other domains that have busy RMIDs
-        */
-       list_for_each_entry(d, &r->domains, list) {
-               if (!cpumask_test_cpu(cpu, &d->cpu_mask) &&
-                   has_busy_rmid(r, d))
-                       cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
-       }
-       if (cpumask_empty(cpu_mask)) {
-               ret = false;
-               goto free_mask;
-       }
-
-       /*
-        * Scan domains with busy RMIDs to check if they still are busy
-        */
-       on_each_cpu_mask(cpu_mask, check_limbo, NULL, true);
-
-       /* Walk limbo list moving all free RMIDs to the &rmid_free_lru list */
-       list_for_each_entry_safe(entry, tmp, &rmid_limbo_lru, list) {
-               if (atomic_read(&entry->busy) != 0) {
-                       ret = false;
-                       continue;
-               }
-               list_del(&entry->list);
-               list_add_tail(&entry->list, &rmid_free_lru);
-       }
-
-free_mask:
-       free_cpumask_var(cpu_mask);
-done:
-       put_cpu();
-       return ret;
+bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
+{
+       return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
  }
  
  /*
@@ -231,15 +160,11 @@ done:
  int alloc_rmid(void)
  {
         struct rmid_entry *entry;
-       bool ret;
  
         lockdep_assert_held(&rdtgroup_mutex);
  
-       if (list_empty(&rmid_free_lru)) {
-               ret = try_freeing_limbo_rmid();
-               if (list_empty(&rmid_free_lru))
-                       return ret ? -ENOSPC : -EBUSY;
-       }
+       if (list_empty(&rmid_free_lru))
+               return rmid_limbo_count ? -EBUSY : -ENOSPC;
  
         entry = list_first_entry(&rmid_free_lru,
                                  struct rmid_entry, list);
@@ -252,11 +177,12 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
  {
         struct rdt_resource *r;
         struct rdt_domain *d;
-       int cpu, nbusy = 0;
+       int cpu;
         u64 val;
  
         r = &rdt_resources_all[RDT_RESOURCE_L3];
  
+       entry->busy = 0;
         cpu = get_cpu();
         list_for_each_entry(d, &r->domains, list) {
                 if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
@@ -264,17 +190,22 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
                         if (val <= intel_cqm_threshold)
                                 continue;
                 }
+
+               /*
+                * For the first limbo RMID in the domain,
+                * setup up the limbo worker.
+                */
+               if (!has_busy_rmid(r, d))
+                       cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
                 set_bit(entry->rmid, d->rmid_busy_llc);
-               nbusy++;
+               entry->busy++;
         }
         put_cpu();
  
-       if (nbusy) {
-               atomic_set(&entry->busy, nbusy);
-               list_add_tail(&entry->list, &rmid_limbo_lru);
-       } else {
+       if (entry->busy)
+               rmid_limbo_count++;
+       else
                 list_add_tail(&entry->list, &rmid_free_lru);
-       }
  }
  
  void free_rmid(u32 rmid)
@@ -387,6 +318,50 @@ static void mbm_update(struct rdt_domain *d, int rmid)
         }
  }
  
+/*
+ * Handler to scan the limbo list and move the RMIDs
+ * to free list whose occupancy < threshold_occupancy.
+ */
+void cqm_handle_limbo(struct work_struct *work)
+{
+       unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
+       int cpu = smp_processor_id();
+       struct rdt_resource *r;
+       struct rdt_domain *d;
+
+       mutex_lock(&rdtgroup_mutex);
+
+       r = &rdt_resources_all[RDT_RESOURCE_L3];
+       d = get_domain_from_cpu(cpu, r);
+
+       if (!d) {
+               pr_warn_once("Failure to get domain for limbo worker\n");
+               goto out_unlock;
+       }
+
+       __check_limbo(d, false);
+
+       if (has_busy_rmid(r, d))
+               schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
+
+out_unlock:
+       mutex_unlock(&rdtgroup_mutex);
+}
+
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
+{
+       unsigned long delay = msecs_to_jiffies(delay_ms);
+       struct rdt_resource *r;
+       int cpu;
+
+       r = &rdt_resources_all[RDT_RESOURCE_L3];
+
+       cpu = cpumask_any(&dom->cpu_mask);
+       dom->cqm_work_cpu = cpu;
+
+       schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
+}
+
  void mbm_handle_overflow(struct work_struct *work)
  {
         unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
@@ -413,6 +388,7 @@ void mbm_handle_overflow(struct work_struct *work)
         }
  
         schedule_delayed_work_on(cpu, &d->mbm_over, delay);
+
  out_unlock:
         mutex_unlock(&rdtgroup_mutex);
  }
author	Vikas Shivappa <vikas.shivappa@linux.intel.com>
	Wed, 16 Aug 2017 01:00:43 +0000 (18:00 -0700)
committer	Thomas Gleixner <tglx@linutronix.de>
	Wed, 16 Aug 2017 10:05:41 +0000 (12:05 +0200)
arch/x86/kernel/cpu/intel_rdt.c		patch \| blob \| blame \| history
arch/x86/kernel/cpu/intel_rdt.h		patch \| blob \| blame \| history
arch/x86/kernel/cpu/intel_rdt_monitor.c		patch \| blob \| blame \| history