perf/x86/intel/pt: Add support for address range filtering in PT
authorAlexander Shishkin <alexander.shishkin@linux.intel.com>
Wed, 27 Apr 2016 15:44:47 +0000 (18:44 +0300)
committerIngo Molnar <mingo@kernel.org>
Thu, 5 May 2016 08:13:58 +0000 (10:13 +0200)
Newer versions of Intel PT support address ranges, which can be used to
define IP address range-based filters or TraceSTOP regions. Number of
ranges in enumerated via cpuid.

This patch implements PMU callbacks and related low-level code to allow
filter validation, configuration and programming into the hardware.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: vince@deater.net
Link: http://lkml.kernel.org/r/1461771888-10409-7-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/events/intel/pt.c
arch/x86/events/intel/pt.h

index e5bfafef3d771f067ae0b4351c0a57e74090b08f..2d1ce2c6ac7bd397f0589f0969c0736049a14ade 100644 (file)
@@ -265,6 +265,75 @@ static bool pt_event_valid(struct perf_event *event)
  * These all are cpu affine and operate on a local PT
  */
 
+/* Address ranges and their corresponding msr configuration registers */
+static const struct pt_address_range {
+       unsigned long   msr_a;
+       unsigned long   msr_b;
+       unsigned int    reg_off;
+} pt_address_ranges[] = {
+       {
+               .msr_a   = MSR_IA32_RTIT_ADDR0_A,
+               .msr_b   = MSR_IA32_RTIT_ADDR0_B,
+               .reg_off = RTIT_CTL_ADDR0_OFFSET,
+       },
+       {
+               .msr_a   = MSR_IA32_RTIT_ADDR1_A,
+               .msr_b   = MSR_IA32_RTIT_ADDR1_B,
+               .reg_off = RTIT_CTL_ADDR1_OFFSET,
+       },
+       {
+               .msr_a   = MSR_IA32_RTIT_ADDR2_A,
+               .msr_b   = MSR_IA32_RTIT_ADDR2_B,
+               .reg_off = RTIT_CTL_ADDR2_OFFSET,
+       },
+       {
+               .msr_a   = MSR_IA32_RTIT_ADDR3_A,
+               .msr_b   = MSR_IA32_RTIT_ADDR3_B,
+               .reg_off = RTIT_CTL_ADDR3_OFFSET,
+       }
+};
+
+static u64 pt_config_filters(struct perf_event *event)
+{
+       struct pt_filters *filters = event->hw.addr_filters;
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+       unsigned int range = 0;
+       u64 rtit_ctl = 0;
+
+       if (!filters)
+               return 0;
+
+       perf_event_addr_filters_sync(event);
+
+       for (range = 0; range < filters->nr_filters; range++) {
+               struct pt_filter *filter = &filters->filter[range];
+
+               /*
+                * Note, if the range has zero start/end addresses due
+                * to its dynamic object not being loaded yet, we just
+                * go ahead and program zeroed range, which will simply
+                * produce no data. Note^2: if executable code at 0x0
+                * is a concern, we can set up an "invalid" configuration
+                * such as msr_b < msr_a.
+                */
+
+               /* avoid redundant msr writes */
+               if (pt->filters.filter[range].msr_a != filter->msr_a) {
+                       wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
+                       pt->filters.filter[range].msr_a = filter->msr_a;
+               }
+
+               if (pt->filters.filter[range].msr_b != filter->msr_b) {
+                       wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
+                       pt->filters.filter[range].msr_b = filter->msr_b;
+               }
+
+               rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
+       }
+
+       return rtit_ctl;
+}
+
 static void pt_config(struct perf_event *event)
 {
        u64 reg;
@@ -274,7 +343,8 @@ static void pt_config(struct perf_event *event)
                wrmsrl(MSR_IA32_RTIT_STATUS, 0);
        }
 
-       reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
+       reg = pt_config_filters(event);
+       reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
 
        if (!event->attr.exclude_kernel)
                reg |= RTIT_CTL_OS;
@@ -921,6 +991,82 @@ static void pt_buffer_free_aux(void *data)
        kfree(buf);
 }
 
+static int pt_addr_filters_init(struct perf_event *event)
+{
+       struct pt_filters *filters;
+       int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
+
+       if (!pt_cap_get(PT_CAP_num_address_ranges))
+               return 0;
+
+       filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
+       if (!filters)
+               return -ENOMEM;
+
+       if (event->parent)
+               memcpy(filters, event->parent->hw.addr_filters,
+                      sizeof(*filters));
+
+       event->hw.addr_filters = filters;
+
+       return 0;
+}
+
+static void pt_addr_filters_fini(struct perf_event *event)
+{
+       kfree(event->hw.addr_filters);
+       event->hw.addr_filters = NULL;
+}
+
+static int pt_event_addr_filters_validate(struct list_head *filters)
+{
+       struct perf_addr_filter *filter;
+       int range = 0;
+
+       list_for_each_entry(filter, filters, entry) {
+               /* PT doesn't support single address triggers */
+               if (!filter->range)
+                       return -EOPNOTSUPP;
+
+               if (!filter->inode && !kernel_ip(filter->offset))
+                       return -EINVAL;
+
+               if (++range > pt_cap_get(PT_CAP_num_address_ranges))
+                       return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static void pt_event_addr_filters_sync(struct perf_event *event)
+{
+       struct perf_addr_filters_head *head = perf_event_addr_filters(event);
+       unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
+       struct pt_filters *filters = event->hw.addr_filters;
+       struct perf_addr_filter *filter;
+       int range = 0;
+
+       if (!filters)
+               return;
+
+       list_for_each_entry(filter, &head->list, entry) {
+               if (filter->inode && !offs[range]) {
+                       msr_a = msr_b = 0;
+               } else {
+                       /* apply the offset */
+                       msr_a = filter->offset + offs[range];
+                       msr_b = filter->size + msr_a;
+               }
+
+               filters->filter[range].msr_a  = msr_a;
+               filters->filter[range].msr_b  = msr_b;
+               filters->filter[range].config = filter->filter ? 1 : 2;
+               range++;
+       }
+
+       filters->nr_filters = range;
+}
+
 /**
  * intel_pt_interrupt() - PT PMI handler
  */
@@ -1128,6 +1274,7 @@ static void pt_event_read(struct perf_event *event)
 
 static void pt_event_destroy(struct perf_event *event)
 {
+       pt_addr_filters_fini(event);
        x86_del_exclusive(x86_lbr_exclusive_pt);
 }
 
@@ -1142,6 +1289,11 @@ static int pt_event_init(struct perf_event *event)
        if (x86_add_exclusive(x86_lbr_exclusive_pt))
                return -EBUSY;
 
+       if (pt_addr_filters_init(event)) {
+               x86_del_exclusive(x86_lbr_exclusive_pt);
+               return -ENOMEM;
+       }
+
        event->destroy = pt_event_destroy;
 
        return 0;
@@ -1195,16 +1347,21 @@ static __init int pt_init(void)
                        PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
 
        pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
-       pt_pmu.pmu.attr_groups  = pt_attr_groups;
-       pt_pmu.pmu.task_ctx_nr  = perf_sw_context;
-       pt_pmu.pmu.event_init   = pt_event_init;
-       pt_pmu.pmu.add          = pt_event_add;
-       pt_pmu.pmu.del          = pt_event_del;
-       pt_pmu.pmu.start        = pt_event_start;
-       pt_pmu.pmu.stop         = pt_event_stop;
-       pt_pmu.pmu.read         = pt_event_read;
-       pt_pmu.pmu.setup_aux    = pt_buffer_setup_aux;
-       pt_pmu.pmu.free_aux     = pt_buffer_free_aux;
+       pt_pmu.pmu.attr_groups           = pt_attr_groups;
+       pt_pmu.pmu.task_ctx_nr           = perf_sw_context;
+       pt_pmu.pmu.event_init            = pt_event_init;
+       pt_pmu.pmu.add                   = pt_event_add;
+       pt_pmu.pmu.del                   = pt_event_del;
+       pt_pmu.pmu.start                 = pt_event_start;
+       pt_pmu.pmu.stop                  = pt_event_stop;
+       pt_pmu.pmu.read                  = pt_event_read;
+       pt_pmu.pmu.setup_aux             = pt_buffer_setup_aux;
+       pt_pmu.pmu.free_aux              = pt_buffer_free_aux;
+       pt_pmu.pmu.addr_filters_sync     = pt_event_addr_filters_sync;
+       pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
+       pt_pmu.pmu.nr_addr_filters       =
+               pt_cap_get(PT_CAP_num_address_ranges);
+
        ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
 
        return ret;
index 0ed9000b3c46e9004ca22f0a88deed7e190e5e25..ca6459996d2df8d344752f04072c1bc31f36b25f 100644 (file)
@@ -140,14 +140,40 @@ struct pt_buffer {
        struct topa_entry       *topa_index[0];
 };
 
+#define PT_FILTERS_NUM 4
+
+/**
+ * struct pt_filter - IP range filter configuration
+ * @msr_a:     range start, goes to RTIT_ADDRn_A
+ * @msr_b:     range end, goes to RTIT_ADDRn_B
+ * @config:    4-bit field in RTIT_CTL
+ */
+struct pt_filter {
+       unsigned long   msr_a;
+       unsigned long   msr_b;
+       unsigned long   config;
+};
+
+/**
+ * struct pt_filters - IP range filtering context
+ * @filter:    filters defined for this context
+ * @nr_filters:        number of defined filters in the @filter array
+ */
+struct pt_filters {
+       struct pt_filter        filter[PT_FILTERS_NUM];
+       unsigned int            nr_filters;
+};
+
 /**
  * struct pt - per-cpu pt context
  * @handle:    perf output handle
+ * @filters:           last configured filters
  * @handle_nmi:        do handle PT PMI on this cpu, there's an active event
  * @vmx_on:    1 if VMX is ON on this cpu
  */
 struct pt {
        struct perf_output_handle handle;
+       struct pt_filters       filters;
        int                     handle_nmi;
        int                     vmx_on;
 };