perf/x86/mbm: Add Intel Memory B/W Monitoring enumeration and init
authorVikas Shivappa <vikas.shivappa@linux.intel.com>
Thu, 10 Mar 2016 23:32:09 +0000 (15:32 -0800)
committerIngo Molnar <mingo@kernel.org>
Mon, 21 Mar 2016 08:08:19 +0000 (09:08 +0100)
The MBM init patch enumerates the Intel MBM (Memory b/w monitoring)
and initializes the perf events and datastructures for monitoring the
memory b/w.

Its based on original patch series by Tony Luck and Kanaka Juvva.

Memory bandwidth monitoring (MBM) provides OS/VMM a way to monitor
bandwidth from one level of cache to another. The current patches
support L3 external bandwidth monitoring. It supports both 'local
bandwidth' and 'total bandwidth' monitoring for the socket. Local
bandwidth measures the amount of data sent through the memory controller
on the socket and total b/w measures the total system bandwidth.

Extending the cache quality of service monitoring (CQM) we add two
more events to the perf infrastructure:

  intel_cqm_llc/local_bytes - bytes sent through local socket memory controller
  intel_cqm_llc/total_bytes - total L3 external bytes sent

The tasks are associated with a Resouce Monitoring ID (RMID) just like
in CQM and OS uses a MSR write to indicate the RMID of the task during
scheduling.

Signed-off-by: Vikas Shivappa <vikas.shivappa@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: fenghua.yu@intel.com
Cc: h.peter.anvin@intel.com
Cc: ravi.v.shankar@intel.com
Cc: vikas.shivappa@intel.com
Link: http://lkml.kernel.org/r/1457652732-4499-4-git-send-email-vikas.shivappa@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/events/intel/cqm.c
arch/x86/include/asm/cpufeatures.h
arch/x86/kernel/cpu/common.c

index dbb058d2910a0dd9d7ac51ad03d81783116d6752..515df11e65bbc6bd60d1fc2d5c64121ee7993dbf 100644 (file)
@@ -15,6 +15,7 @@
 
 static u32 cqm_max_rmid = -1;
 static unsigned int cqm_l3_scale; /* supposedly cacheline size */
+static bool cqm_enabled, mbm_enabled;
 
 /**
  * struct intel_pqr_state - State cache for the PQR MSR
@@ -42,6 +43,24 @@ struct intel_pqr_state {
  * interrupts disabled, which is sufficient for the protection.
  */
 static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
+/**
+ * struct sample - mbm event's (local or total) data
+ * @total_bytes    #bytes since we began monitoring
+ * @prev_msr       previous value of MSR
+ */
+struct sample {
+       u64     total_bytes;
+       u64     prev_msr;
+};
+
+/*
+ * samples profiled for total memory bandwidth type events
+ */
+static struct sample *mbm_total;
+/*
+ * samples profiled for local memory bandwidth type events
+ */
+static struct sample *mbm_local;
 
 /*
  * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
@@ -223,6 +242,7 @@ static void cqm_cleanup(void)
 
        kfree(cqm_rmid_ptrs);
        cqm_rmid_ptrs = NULL;
+       cqm_enabled = false;
 }
 
 static int intel_cqm_setup_rmid_cache(void)
@@ -1164,6 +1184,16 @@ EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
 EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
 EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
 
+EVENT_ATTR_STR(total_bytes, intel_cqm_total_bytes, "event=0x02");
+EVENT_ATTR_STR(total_bytes.per-pkg, intel_cqm_total_bytes_pkg, "1");
+EVENT_ATTR_STR(total_bytes.unit, intel_cqm_total_bytes_unit, "MB");
+EVENT_ATTR_STR(total_bytes.scale, intel_cqm_total_bytes_scale, "1e-6");
+
+EVENT_ATTR_STR(local_bytes, intel_cqm_local_bytes, "event=0x03");
+EVENT_ATTR_STR(local_bytes.per-pkg, intel_cqm_local_bytes_pkg, "1");
+EVENT_ATTR_STR(local_bytes.unit, intel_cqm_local_bytes_unit, "MB");
+EVENT_ATTR_STR(local_bytes.scale, intel_cqm_local_bytes_scale, "1e-6");
+
 static struct attribute *intel_cqm_events_attr[] = {
        EVENT_PTR(intel_cqm_llc),
        EVENT_PTR(intel_cqm_llc_pkg),
@@ -1173,9 +1203,38 @@ static struct attribute *intel_cqm_events_attr[] = {
        NULL,
 };
 
+static struct attribute *intel_mbm_events_attr[] = {
+       EVENT_PTR(intel_cqm_total_bytes),
+       EVENT_PTR(intel_cqm_local_bytes),
+       EVENT_PTR(intel_cqm_total_bytes_pkg),
+       EVENT_PTR(intel_cqm_local_bytes_pkg),
+       EVENT_PTR(intel_cqm_total_bytes_unit),
+       EVENT_PTR(intel_cqm_local_bytes_unit),
+       EVENT_PTR(intel_cqm_total_bytes_scale),
+       EVENT_PTR(intel_cqm_local_bytes_scale),
+       NULL,
+};
+
+static struct attribute *intel_cmt_mbm_events_attr[] = {
+       EVENT_PTR(intel_cqm_llc),
+       EVENT_PTR(intel_cqm_total_bytes),
+       EVENT_PTR(intel_cqm_local_bytes),
+       EVENT_PTR(intel_cqm_llc_pkg),
+       EVENT_PTR(intel_cqm_total_bytes_pkg),
+       EVENT_PTR(intel_cqm_local_bytes_pkg),
+       EVENT_PTR(intel_cqm_llc_unit),
+       EVENT_PTR(intel_cqm_total_bytes_unit),
+       EVENT_PTR(intel_cqm_local_bytes_unit),
+       EVENT_PTR(intel_cqm_llc_scale),
+       EVENT_PTR(intel_cqm_total_bytes_scale),
+       EVENT_PTR(intel_cqm_local_bytes_scale),
+       EVENT_PTR(intel_cqm_llc_snapshot),
+       NULL,
+};
+
 static struct attribute_group intel_cqm_events_group = {
        .name = "events",
-       .attrs = intel_cqm_events_attr,
+       .attrs = NULL,
 };
 
 PMU_FORMAT_ATTR(event, "config:0-7");
@@ -1322,12 +1381,57 @@ static const struct x86_cpu_id intel_cqm_match[] = {
        {}
 };
 
+static void mbm_cleanup(void)
+{
+       if (!mbm_enabled)
+               return;
+
+       kfree(mbm_local);
+       kfree(mbm_total);
+       mbm_enabled = false;
+}
+
+static const struct x86_cpu_id intel_mbm_local_match[] = {
+       { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_LOCAL },
+       {}
+};
+
+static const struct x86_cpu_id intel_mbm_total_match[] = {
+       { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_TOTAL },
+       {}
+};
+
+static int intel_mbm_init(void)
+{
+       int array_size, maxid = cqm_max_rmid + 1;
+
+       array_size = sizeof(struct sample) * maxid * topology_max_packages();
+       mbm_local = kmalloc(array_size, GFP_KERNEL);
+       if (!mbm_local)
+               return -ENOMEM;
+
+       mbm_total = kmalloc(array_size, GFP_KERNEL);
+       if (!mbm_total) {
+               mbm_cleanup();
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
 static int __init intel_cqm_init(void)
 {
        char *str = NULL, scale[20];
        int i, cpu, ret;
 
-       if (!x86_match_cpu(intel_cqm_match))
+       if (x86_match_cpu(intel_cqm_match))
+               cqm_enabled = true;
+
+       if (x86_match_cpu(intel_mbm_local_match) &&
+            x86_match_cpu(intel_mbm_total_match))
+               mbm_enabled = true;
+
+       if (!cqm_enabled && !mbm_enabled)
                return -ENODEV;
 
        cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
@@ -1384,13 +1488,28 @@ static int __init intel_cqm_init(void)
                cqm_pick_event_reader(i);
        }
 
+       if (mbm_enabled)
+               ret = intel_mbm_init();
+       if (ret && !cqm_enabled)
+               goto out;
+
+       if (cqm_enabled && mbm_enabled)
+               intel_cqm_events_group.attrs = intel_cmt_mbm_events_attr;
+       else if (!cqm_enabled && mbm_enabled)
+               intel_cqm_events_group.attrs = intel_mbm_events_attr;
+       else if (cqm_enabled && !mbm_enabled)
+               intel_cqm_events_group.attrs = intel_cqm_events_attr;
+
        ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
        if (ret) {
                pr_err("Intel CQM perf registration failed: %d\n", ret);
                goto out;
        }
 
-       pr_info("Intel CQM monitoring enabled\n");
+       if (cqm_enabled)
+               pr_info("Intel CQM monitoring enabled\n");
+       if (mbm_enabled)
+               pr_info("Intel MBM enabled\n");
 
        /*
         * Register the hot cpu notifier once we are sure cqm
@@ -1402,6 +1521,7 @@ out:
        if (ret) {
                kfree(str);
                cqm_cleanup();
+               mbm_cleanup();
        }
 
        return ret;
index 074b7604bd5122d2b5d7da01f14a610d4df31239..746dd6ae493250e874d0cfc6fd3bc0aaadf21382 100644 (file)
 
 /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
 #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
+#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
 #define X86_FEATURE_CLZERO     (13*32+0) /* CLZERO instruction */
index 62590aa064c83dd3282d7084c0124de681aeee7b..e601c1286e29ab23e09239272584c31957219806 100644 (file)
@@ -649,7 +649,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
                        cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
                        c->x86_capability[CPUID_F_1_EDX] = edx;
 
-                       if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) {
+                       if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) ||
+                             ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) ||
+                              (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) {
                                c->x86_cache_max_rmid = ecx;
                                c->x86_cache_occ_scale = ebx;
                        }