perf, x86: use LBR for PEBS IP+1 fixup
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Wed, 3 Mar 2010 12:12:23 +0000 (13:12 +0100)
committerIngo Molnar <mingo@elte.hu>
Wed, 10 Mar 2010 12:23:32 +0000 (13:23 +0100)
Use the LBR to fix up the PEBS IP+1 issue.

As said, PEBS reports the next instruction, here we use the LBR to find
the last branch and from that construct the actual IP. If the IP matches
the LBR-TO, we use LBR-FROM, otherwise we use the LBR-TO address as the
beginning of the last basic block and decode forward.

Once we find a match to the current IP, we use the previous location.

This patch introduces a new ABI element: PERF_RECORD_MISC_EXACT, which
conveys that the reported IP (PERF_SAMPLE_IP) is the exact instruction
that caused the event (barring CPU errata).

The fixup can fail due to various reasons:

 1) LBR contains invalid data (quite possible)
 2) part of the basic block got paged out
 3) the reported IP isn't part of the basic block (see 1)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.619375431@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/include/asm/perf_event.h
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_ds.c
include/linux/perf_event.h

index db6109a885a76a6ecd116c59baae0088007ffc45..a9038c95161995b559bc8afb507089445dbf678f 100644 (file)
@@ -136,6 +136,25 @@ extern void perf_events_lapic_init(void);
 
 #define PERF_EVENT_INDEX_OFFSET                        0
 
+/*
+ * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
+ * This flag is otherwise unused and ABI specified to be 0, so nobody should
+ * care what we do with it.
+ */
+#define PERF_EFLAGS_EXACT      (1UL << 3)
+
+#define perf_misc_flags(regs)                          \
+({     int misc = 0;                                   \
+       if (user_mode(regs))                            \
+               misc |= PERF_RECORD_MISC_USER;          \
+       else                                            \
+               misc |= PERF_RECORD_MISC_KERNEL;        \
+       if (regs->flags & PERF_EFLAGS_EXACT)            \
+               misc |= PERF_RECORD_MISC_EXACT;         \
+       misc; })
+
+#define perf_instruction_pointer(regs) ((regs)->ip)
+
 #else
 static inline void init_hw_perf_events(void)           { }
 static inline void perf_events_lapic_init(void)        { }
index 1badff6b6b28f2dbc6fee736d0ba42221fd2b187..5cb4e8dcee4bc939a4774534f1ce9f98b9068d9e 100644 (file)
 #include <asm/stacktrace.h>
 #include <asm/nmi.h>
 
+/*
+ * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
+ */
+static unsigned long
+copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
+{
+       unsigned long offset, addr = (unsigned long)from;
+       int type = in_nmi() ? KM_NMI : KM_IRQ0;
+       unsigned long size, len = 0;
+       struct page *page;
+       void *map;
+       int ret;
+
+       do {
+               ret = __get_user_pages_fast(addr, 1, 0, &page);
+               if (!ret)
+                       break;
+
+               offset = addr & (PAGE_SIZE - 1);
+               size = min(PAGE_SIZE - offset, n - len);
+
+               map = kmap_atomic(page, type);
+               memcpy(to, map+offset, size);
+               kunmap_atomic(map, type);
+               put_page(page);
+
+               len  += size;
+               to   += size;
+               addr += size;
+
+       } while (len < n);
+
+       return len;
+}
+
 static u64 perf_event_mask __read_mostly;
 
 struct event_constraint {
@@ -1550,41 +1585,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
        dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
 }
 
-/*
- * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
- */
-static unsigned long
-copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
-{
-       unsigned long offset, addr = (unsigned long)from;
-       int type = in_nmi() ? KM_NMI : KM_IRQ0;
-       unsigned long size, len = 0;
-       struct page *page;
-       void *map;
-       int ret;
-
-       do {
-               ret = __get_user_pages_fast(addr, 1, 0, &page);
-               if (!ret)
-                       break;
-
-               offset = addr & (PAGE_SIZE - 1);
-               size = min(PAGE_SIZE - offset, n - len);
-
-               map = kmap_atomic(page, type);
-               memcpy(to, map+offset, size);
-               kunmap_atomic(map, type);
-               put_page(page);
-
-               len  += size;
-               to   += size;
-               addr += size;
-
-       } while (len < n);
-
-       return len;
-}
-
 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
 {
        unsigned long bytes;
index 44f6ed42a934a1c61a96a569379e515bfe9e67e1..7eb78be3b2297f7ddb5b1d4ab943d1ed27125229 100644 (file)
@@ -547,7 +547,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
        x86_pmu_disable_event(event);
 
        if (unlikely(event->attr.precise))
-               intel_pmu_pebs_disable(hwc);
+               intel_pmu_pebs_disable(event);
 }
 
 static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
@@ -600,7 +600,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
        }
 
        if (unlikely(event->attr.precise))
-               intel_pmu_pebs_enable(hwc);
+               intel_pmu_pebs_enable(event);
 
        __x86_pmu_enable_event(hwc);
 }
index 0d994ef213b95514a80598ecb115268462d2e864..50e6ff3281fc885a13cb5cb0dee9476c90c8df58 100644 (file)
@@ -331,26 +331,32 @@ intel_pebs_constraints(struct perf_event *event)
        return &emptyconstraint;
 }
 
-static void intel_pmu_pebs_enable(struct hw_perf_event *hwc)
+static void intel_pmu_pebs_enable(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
        u64 val = cpuc->pebs_enabled;
 
        hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
        val |= 1ULL << hwc->idx;
        wrmsrl(MSR_IA32_PEBS_ENABLE, val);
+
+       intel_pmu_lbr_enable(event);
 }
 
-static void intel_pmu_pebs_disable(struct hw_perf_event *hwc)
+static void intel_pmu_pebs_disable(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
        u64 val = cpuc->pebs_enabled;
 
        val &= ~(1ULL << hwc->idx);
        wrmsrl(MSR_IA32_PEBS_ENABLE, val);
 
        hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
+
+       intel_pmu_lbr_disable(event);
 }
 
 static void intel_pmu_pebs_enable_all(void)
@@ -369,6 +375,70 @@ static void intel_pmu_pebs_disable_all(void)
                wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
 }
 
+#include <asm/insn.h>
+
+#define MAX_INSN_SIZE  16
+
+static inline bool kernel_ip(unsigned long ip)
+{
+#ifdef CONFIG_X86_32
+       return ip > PAGE_OFFSET;
+#else
+       return (long)ip < 0;
+#endif
+}
+
+static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       unsigned long from = cpuc->lbr_entries[0].from;
+       unsigned long old_to, to = cpuc->lbr_entries[0].to;
+       unsigned long ip = regs->ip;
+
+       if (!cpuc->lbr_stack.nr || !from || !to)
+               return 0;
+
+       if (ip < to)
+               return 0;
+
+       /*
+        * We sampled a branch insn, rewind using the LBR stack
+        */
+       if (ip == to) {
+               regs->ip = from;
+               return 1;
+       }
+
+       do {
+               struct insn insn;
+               u8 buf[MAX_INSN_SIZE];
+               void *kaddr;
+
+               old_to = to;
+               if (!kernel_ip(ip)) {
+                       int bytes, size = min_t(int, MAX_INSN_SIZE, ip - to);
+
+                       bytes = copy_from_user_nmi(buf, (void __user *)to, size);
+                       if (bytes != size)
+                               return 0;
+
+                       kaddr = buf;
+               } else
+                       kaddr = (void *)to;
+
+               kernel_insn_init(&insn, kaddr);
+               insn_get_length(&insn);
+               to += insn.length;
+       } while (to < ip);
+
+       if (to == ip) {
+               regs->ip = old_to;
+               return 1;
+       }
+
+       return 0;
+}
+
 static int intel_pmu_save_and_restart(struct perf_event *event);
 static void intel_pmu_disable_event(struct perf_event *event);
 
@@ -424,6 +494,11 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
        regs.bp = at->bp;
        regs.sp = at->sp;
 
+       if (intel_pmu_pebs_fixup_ip(&regs))
+               regs.flags |= PERF_EFLAGS_EXACT;
+       else
+               regs.flags &= ~PERF_EFLAGS_EXACT;
+
        if (perf_event_overflow(event, 1, &data, &regs))
                intel_pmu_disable_event(event);
 
@@ -487,6 +562,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
                regs.bp = at->bp;
                regs.sp = at->sp;
 
+               if (intel_pmu_pebs_fixup_ip(&regs))
+                       regs.flags |= PERF_EFLAGS_EXACT;
+               else
+                       regs.flags &= ~PERF_EFLAGS_EXACT;
+
                if (perf_event_overflow(event, 1, &data, &regs))
                        intel_pmu_disable_event(event);
        }
index ab4fd9ede264539dc6ce50d0116e61f5c19e74b0..be85f7c4a94ffd2548b753acb3f7b4cea780c402 100644 (file)
@@ -294,6 +294,12 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_USER                  (2 << 0)
 #define PERF_RECORD_MISC_HYPERVISOR            (3 << 0)
 
+#define PERF_RECORD_MISC_EXACT                 (1 << 14)
+/*
+ * Reserve the last bit to indicate some extended misc field
+ */
+#define PERF_RECORD_MISC_EXT_RESERVED          (1 << 15)
+
 struct perf_event_header {
        __u32   type;
        __u16   misc;