perfcounters: throttle on too high IRQ rates
authorMike Galbraith <efault@gmx.de>
Fri, 23 Jan 2009 09:13:01 +0000 (10:13 +0100)
committerIngo Molnar <mingo@elte.hu>
Fri, 23 Jan 2009 10:33:18 +0000 (11:33 +0100)
Starting kerneltop with only -c 100 seems to be a bad idea, it can
easily lock the system due to perfcounter IRQ overload.

So add throttling: if a new IRQ arrives in a shorter than
PERFMON_MIN_PERIOD_NS time, turn off perfcounters and untrottle them
from the next timer tick.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/kernel/apic.c
arch/x86/kernel/cpu/perf_counter.c
include/linux/perf_counter.h

index 7b434e5b14c933585f6ec2b8c2f04e96f0e120ba..849c23009bf55e325b223e00700b95a13d04ec2d 100644 (file)
@@ -781,6 +781,8 @@ static void local_apic_timer_interrupt(void)
        inc_irq_stat(apic_timer_irqs);
 
        evt->event_handler(evt);
+
+       perf_counter_unthrottle();
 }
 
 /*
index 9376771f757b26167c8965cf54955802b84d2dc8..1a040b179b53780efcccf3ac4fc1ae3581240271 100644 (file)
@@ -33,6 +33,9 @@ static int nr_counters_fixed __read_mostly;
 struct cpu_hw_counters {
        struct perf_counter     *counters[X86_PMC_IDX_MAX];
        unsigned long           used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+       u64                     last_interrupt;
+       u64                     global_enable;
+       int                     throttled;
 };
 
 /*
@@ -474,16 +477,19 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
 static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
 {
        int bit, cpu = smp_processor_id();
-       u64 ack, status, saved_global;
-       struct cpu_hw_counters *cpuc;
+       u64 ack, status, now;
+       struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
 
-       rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global);
+       rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
 
        /* Disable counters globally */
        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
        ack_APIC_irq();
 
-       cpuc = &per_cpu(cpu_hw_counters, cpu);
+       now = sched_clock();
+       if (now - cpuc->last_interrupt < PERFMON_MIN_PERIOD_NS)
+               cpuc->throttled = 1;
+       cpuc->last_interrupt = now;
 
        rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
        if (!status)
@@ -533,9 +539,29 @@ again:
                goto again;
 out:
        /*
-        * Restore - do not reenable when global enable is off:
+        * Restore - do not reenable when global enable is off or throttled:
         */
-       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global);
+       if (!cpuc->throttled)
+               wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
+}
+
+void perf_counter_unthrottle(void)
+{
+       struct cpu_hw_counters *cpuc;
+
+       if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+               return;
+
+       if (unlikely(!perf_counters_initialized))
+               return;
+
+       cpuc = &per_cpu(cpu_hw_counters, smp_processor_id());
+       if (cpuc->throttled) {
+               if (printk_ratelimit())
+                       printk(KERN_WARNING "PERFMON: max event frequency exceeded!\n");
+               wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
+               cpuc->throttled = 0;
+       }
 }
 
 void smp_perf_counter_interrupt(struct pt_regs *regs)
index 33ba9fe0a78104fff1a15d7717ed8db5efecb723..91f1ca4c01c08e85f4494de9f0a300f11cbb14e1 100644 (file)
@@ -254,6 +254,7 @@ extern void perf_counter_init_task(struct task_struct *child);
 extern void perf_counter_exit_task(struct task_struct *child);
 extern void perf_counter_notify(struct pt_regs *regs);
 extern void perf_counter_print_debug(void);
+extern void perf_counter_unthrottle(void);
 extern u64 hw_perf_save_disable(void);
 extern void hw_perf_restore(u64 ctrl);
 extern int perf_counter_task_disable(void);
@@ -270,6 +271,8 @@ static inline int is_software_counter(struct perf_counter *counter)
        return !counter->hw_event.raw && counter->hw_event.type < 0;
 }
 
+#define PERFMON_MIN_PERIOD_NS 10000
+
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)          { }
@@ -281,6 +284,7 @@ static inline void perf_counter_init_task(struct task_struct *child)        { }
 static inline void perf_counter_exit_task(struct task_struct *child)   { }
 static inline void perf_counter_notify(struct pt_regs *regs)           { }
 static inline void perf_counter_print_debug(void)                      { }
+static inline void perf_counter_unthrottle(void)                       { }
 static inline void hw_perf_restore(u64 ctrl)                   { }
 static inline u64 hw_perf_save_disable(void)                 { return 0; }
 static inline int perf_counter_task_disable(void)      { return -EINVAL; }