perf_counter: allow for data addresses to be recorded
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Wed, 8 Apr 2009 13:01:33 +0000 (15:01 +0200)
committerIngo Molnar <mingo@elte.hu>
Wed, 8 Apr 2009 17:05:56 +0000 (19:05 +0200)
Paul suggested we allow for data addresses to be recorded along with
the traditional IPs as power can provide these.

For now, only the software pagefault events provide data addresses,
but in the future power might as well for some events.

x86 doesn't seem capable of providing this atm.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130409.394816925@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/powerpc/kernel/perf_counter.c
arch/powerpc/mm/fault.c
arch/x86/kernel/cpu/perf_counter.c
arch/x86/mm/fault.c
include/linux/perf_counter.h
kernel/perf_counter.c

index 0697ade84dd3a565d0c370b19d72e24cfc9d9844..c9d019f1907425a0349321567b6279285208c305 100644 (file)
@@ -749,7 +749,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
         * Finally record data if requested.
         */
        if (record)
-               perf_counter_overflow(counter, 1, regs);
+               perf_counter_overflow(counter, 1, regs, 0);
 }
 
 /*
index 17bbf6f91fbe6e560a8c637a2b15de2aa41fec86..ac0e112031b29f4a6b96d6b6a9b8b61cdfdc5d34 100644 (file)
@@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
                die("Weird page fault", regs, SIGSEGV);
        }
 
-       perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs);
+       perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);
 
        /* When running in the kernel we expect faults to occur only to
         * addresses in user space.  All other faults represent errors in the
@@ -312,7 +312,8 @@ good_area:
        }
        if (ret & VM_FAULT_MAJOR) {
                current->maj_flt++;
-               perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
+               perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
+                                    regs, address);
 #ifdef CONFIG_PPC_SMLPAR
                if (firmware_has_feature(FW_FEATURE_CMO)) {
                        preempt_disable();
@@ -322,7 +323,8 @@ good_area:
 #endif
        } else {
                current->min_flt++;
-               perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
+               perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
+                                    regs, address);
        }
        up_read(&mm->mmap_sem);
        return 0;
index 1116a41bc7b5d351321e7b490c3ee9e7b01b49d2..0fcbaab83f9bf1276653b4fceb7100177a27601d 100644 (file)
@@ -800,7 +800,7 @@ again:
                        continue;
 
                perf_save_and_restart(counter);
-               if (perf_counter_overflow(counter, nmi, regs))
+               if (perf_counter_overflow(counter, nmi, regs, 0))
                        __pmc_generic_disable(counter, &counter->hw, bit);
        }
 
index f2d3324d9215208b7788a23e071fedde2122ab4f..6f9df2babe487c4bfb9df2de78f346762c877f6a 100644 (file)
@@ -1045,7 +1045,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
        if (unlikely(error_code & PF_RSVD))
                pgtable_bad(regs, error_code, address);
 
-       perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs);
+       perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);
 
        /*
         * If we're in an interrupt, have no user context or are running
@@ -1142,10 +1142,12 @@ good_area:
 
        if (fault & VM_FAULT_MAJOR) {
                tsk->maj_flt++;
-               perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
+               perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
+                                    regs, address);
        } else {
                tsk->min_flt++;
-               perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
+               perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
+                                    regs, address);
        }
 
        check_v8086_mode(regs, address, tsk);
index 8bd1be58c938ce60627b29ad7aa3fee9a7329be4..c22363a4f7466e4ad54e1f1689eca32958da5fb4 100644 (file)
@@ -101,8 +101,9 @@ enum perf_counter_record_format {
        PERF_RECORD_IP          = 1U << 0,
        PERF_RECORD_TID         = 1U << 1,
        PERF_RECORD_TIME        = 1U << 2,
-       PERF_RECORD_GROUP       = 1U << 3,
-       PERF_RECORD_CALLCHAIN   = 1U << 4,
+       PERF_RECORD_ADDR        = 1U << 3,
+       PERF_RECORD_GROUP       = 1U << 4,
+       PERF_RECORD_CALLCHAIN   = 1U << 5,
 };
 
 /*
@@ -251,6 +252,7 @@ enum perf_event_type {
         *      { u64                   ip;       } && PERF_RECORD_IP
         *      { u32                   pid, tid; } && PERF_RECORD_TID
         *      { u64                   time;     } && PERF_RECORD_TIME
+        *      { u64                   addr;     } && PERF_RECORD_ADDR
         *
         *      { u64                   nr;
         *        { u64 event, val; }   cnt[nr];  } && PERF_RECORD_GROUP
@@ -537,7 +539,7 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 extern void perf_counter_update_userpage(struct perf_counter *counter);
 
 extern int perf_counter_overflow(struct perf_counter *counter,
-                                int nmi, struct pt_regs *regs);
+                                int nmi, struct pt_regs *regs, u64 addr);
 /*
  * Return 1 for a software counter, 0 for a hardware counter
  */
@@ -547,7 +549,7 @@ static inline int is_software_counter(struct perf_counter *counter)
                perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE;
 }
 
-extern void perf_swcounter_event(u32, u64, int, struct pt_regs *);
+extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
 
 extern void perf_counter_mmap(unsigned long addr, unsigned long len,
                              unsigned long pgoff, struct file *file);
@@ -584,8 +586,8 @@ static inline int perf_counter_task_disable(void)   { return -EINVAL; }
 static inline int perf_counter_task_enable(void)       { return -EINVAL; }
 
 static inline void
-perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { }
-
+perf_swcounter_event(u32 event, u64 nr, int nmi,
+                    struct pt_regs *regs, u64 addr)                    { }
 
 static inline void
 perf_counter_mmap(unsigned long addr, unsigned long len,
index 4dc8600d2825a4b6b265624ba7763aa42360760a..321c57e3556f568bca44c3aa57767a42cf633267 100644 (file)
@@ -800,7 +800,7 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
        update_context_time(ctx);
 
        regs = task_pt_regs(task);
-       perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs);
+       perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
        __perf_counter_sched_out(ctx, cpuctx);
 
        cpuctx->task_ctx = NULL;
@@ -1810,7 +1810,7 @@ static void perf_output_end(struct perf_output_handle *handle)
 }
 
 static void perf_counter_output(struct perf_counter *counter,
-                               int nmi, struct pt_regs *regs)
+                               int nmi, struct pt_regs *regs, u64 addr)
 {
        int ret;
        u64 record_type = counter->hw_event.record_type;
@@ -1860,6 +1860,11 @@ static void perf_counter_output(struct perf_counter *counter,
                header.size += sizeof(u64);
        }
 
+       if (record_type & PERF_RECORD_ADDR) {
+               header.type |= PERF_RECORD_ADDR;
+               header.size += sizeof(u64);
+       }
+
        if (record_type & PERF_RECORD_GROUP) {
                header.type |= PERF_RECORD_GROUP;
                header.size += sizeof(u64) +
@@ -1892,6 +1897,9 @@ static void perf_counter_output(struct perf_counter *counter,
        if (record_type & PERF_RECORD_TIME)
                perf_output_put(&handle, time);
 
+       if (record_type & PERF_RECORD_ADDR)
+               perf_output_put(&handle, addr);
+
        if (record_type & PERF_RECORD_GROUP) {
                struct perf_counter *leader, *sub;
                u64 nr = counter->nr_siblings;
@@ -2158,7 +2166,7 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
  */
 
 int perf_counter_overflow(struct perf_counter *counter,
-                         int nmi, struct pt_regs *regs)
+                         int nmi, struct pt_regs *regs, u64 addr)
 {
        int events = atomic_read(&counter->event_limit);
        int ret = 0;
@@ -2175,7 +2183,7 @@ int perf_counter_overflow(struct perf_counter *counter,
                        perf_counter_disable(counter);
        }
 
-       perf_counter_output(counter, nmi, regs);
+       perf_counter_output(counter, nmi, regs, addr);
        return ret;
 }
 
@@ -2240,7 +2248,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
                regs = task_pt_regs(current);
 
        if (regs) {
-               if (perf_counter_overflow(counter, 0, regs))
+               if (perf_counter_overflow(counter, 0, regs, 0))
                        ret = HRTIMER_NORESTART;
        }
 
@@ -2250,11 +2258,11 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 }
 
 static void perf_swcounter_overflow(struct perf_counter *counter,
-                                   int nmi, struct pt_regs *regs)
+                                   int nmi, struct pt_regs *regs, u64 addr)
 {
        perf_swcounter_update(counter);
        perf_swcounter_set_period(counter);
-       if (perf_counter_overflow(counter, nmi, regs))
+       if (perf_counter_overflow(counter, nmi, regs, addr))
                /* soft-disable the counter */
                ;
 
@@ -2286,16 +2294,17 @@ static int perf_swcounter_match(struct perf_counter *counter,
 }
 
 static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
-                              int nmi, struct pt_regs *regs)
+                              int nmi, struct pt_regs *regs, u64 addr)
 {
        int neg = atomic64_add_negative(nr, &counter->hw.count);
        if (counter->hw.irq_period && !neg)
-               perf_swcounter_overflow(counter, nmi, regs);
+               perf_swcounter_overflow(counter, nmi, regs, addr);
 }
 
 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
                                     enum perf_event_types type, u32 event,
-                                    u64 nr, int nmi, struct pt_regs *regs)
+                                    u64 nr, int nmi, struct pt_regs *regs,
+                                    u64 addr)
 {
        struct perf_counter *counter;
 
@@ -2305,7 +2314,7 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
        rcu_read_lock();
        list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
                if (perf_swcounter_match(counter, type, event, regs))
-                       perf_swcounter_add(counter, nr, nmi, regs);
+                       perf_swcounter_add(counter, nr, nmi, regs, addr);
        }
        rcu_read_unlock();
 }
@@ -2325,7 +2334,8 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
 }
 
 static void __perf_swcounter_event(enum perf_event_types type, u32 event,
-                                  u64 nr, int nmi, struct pt_regs *regs)
+                                  u64 nr, int nmi, struct pt_regs *regs,
+                                  u64 addr)
 {
        struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
        int *recursion = perf_swcounter_recursion_context(cpuctx);
@@ -2336,10 +2346,11 @@ static void __perf_swcounter_event(enum perf_event_types type, u32 event,
        (*recursion)++;
        barrier();
 
-       perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs);
+       perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
+                                nr, nmi, regs, addr);
        if (cpuctx->task_ctx) {
                perf_swcounter_ctx_event(cpuctx->task_ctx, type, event,
-                               nr, nmi, regs);
+                                        nr, nmi, regs, addr);
        }
 
        barrier();
@@ -2349,9 +2360,10 @@ out:
        put_cpu_var(perf_cpu_context);
 }
 
-void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)
+void
+perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 {
-       __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs);
+       __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr);
 }
 
 static void perf_swcounter_read(struct perf_counter *counter)
@@ -2548,7 +2560,7 @@ void perf_tpcounter_event(int event_id)
        if (!regs)
                regs = task_pt_regs(current);
 
-       __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs);
+       __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0);
 }
 
 extern int ftrace_profile_enable(int);