perf_counter: frequency based adaptive irq_period
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Fri, 15 May 2009 13:19:28 +0000 (15:19 +0200)
committerIngo Molnar <mingo@elte.hu>
Fri, 15 May 2009 13:26:56 +0000 (15:26 +0200)
Instead of specifying the irq_period for a counter, provide a target interrupt
frequency and dynamically adapt the irq_period to match this frequency.

[ Impact: new perf-counter attribute/feature ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.646195868@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/powerpc/kernel/perf_counter.c
arch/x86/kernel/cpu/perf_counter.c
include/linux/perf_counter.h
kernel/perf_counter.c

index bb1b463c1361bc8f638e01a90620f2e0bc08749e..db8d5cafc1595a328d38df62699cf78ce120626a 100644 (file)
@@ -534,7 +534,7 @@ void hw_perf_enable(void)
                        continue;
                }
                val = 0;
-               if (counter->hw_event.irq_period) {
+               if (counter->hw.irq_period) {
                        left = atomic64_read(&counter->hw.period_left);
                        if (left < 0x80000000L)
                                val = 0x80000000L - left;
@@ -829,8 +829,6 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
        if (!ppmu)
                return ERR_PTR(-ENXIO);
-       if ((s64)counter->hw_event.irq_period < 0)
-               return ERR_PTR(-EINVAL);
        if (!perf_event_raw(&counter->hw_event)) {
                ev = perf_event_id(&counter->hw_event);
                if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
@@ -901,7 +899,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
        counter->hw.config = events[n];
        counter->hw.counter_base = cflags[n];
-       atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
+       atomic64_set(&counter->hw.period_left, counter->hw.irq_period);
 
        /*
         * See if we need to reserve the PMU.
@@ -934,6 +932,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 static void record_and_restart(struct perf_counter *counter, long val,
                               struct pt_regs *regs, int nmi)
 {
+       u64 period = counter->hw.irq_period;
        s64 prev, delta, left;
        int record = 0;
 
@@ -948,11 +947,11 @@ static void record_and_restart(struct perf_counter *counter, long val,
         */
        val = 0;
        left = atomic64_read(&counter->hw.period_left) - delta;
-       if (counter->hw_event.irq_period) {
+       if (period) {
                if (left <= 0) {
-                       left += counter->hw_event.irq_period;
+                       left += period;
                        if (left <= 0)
-                               left = counter->hw_event.irq_period;
+                               left = period;
                        record = 1;
                }
                if (left < 0x80000000L)
index 5a7f718eb1e15c90a6ac0e7b5462cbe120980556..886dcf334bc350de15ccbc5844fb11d8e089f50c 100644 (file)
@@ -286,11 +286,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
                hwc->nmi = 1;
        }
 
-       hwc->irq_period = hw_event->irq_period;
-       if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period)
-               hwc->irq_period = x86_pmu.max_period;
-
-       atomic64_set(&hwc->period_left, hwc->irq_period);
+       atomic64_set(&hwc->period_left,
+                       min(x86_pmu.max_period, hwc->irq_period));
 
        /*
         * Raw event type provide the config in the event structure
@@ -458,7 +455,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
                             struct hw_perf_counter *hwc, int idx)
 {
        s64 left = atomic64_read(&hwc->period_left);
-       s64 period = hwc->irq_period;
+       s64 period = min(x86_pmu.max_period, hwc->irq_period);
        int err;
 
        /*
index e543ecc129f12651a0b8baddf9c6184670adfd9f..004b6e162b96c7c39f1b81df18b88bc28e739be6 100644 (file)
@@ -130,7 +130,11 @@ struct perf_counter_hw_event {
         */
        __u64                   config;
 
-       __u64                   irq_period;
+       union {
+               __u64           irq_period;
+               __u64           irq_freq;
+       };
+
        __u32                   record_type;
        __u32                   read_format;
 
@@ -146,8 +150,9 @@ struct perf_counter_hw_event {
                                mmap           :  1, /* include mmap data     */
                                munmap         :  1, /* include munmap data   */
                                comm           :  1, /* include comm data     */
+                               freq           :  1, /* use freq, not period  */
 
-                               __reserved_1   : 52;
+                               __reserved_1   : 51;
 
        __u32                   extra_config_len;
        __u32                   wakeup_events;  /* wakeup every n events */
@@ -337,6 +342,7 @@ struct hw_perf_counter {
        atomic64_t                      prev_count;
        u64                             irq_period;
        atomic64_t                      period_left;
+       u64                             interrupts;
 #endif
 };
 
index 93f4a0e4b8739a0047bb3d0bee4d7804158cd8b5..0ad1db4f3d65184fef3a557347276770952b1145 100644 (file)
@@ -1046,6 +1046,38 @@ int perf_counter_task_enable(void)
        return 0;
 }
 
+void perf_adjust_freq(struct perf_counter_context *ctx)
+{
+       struct perf_counter *counter;
+       u64 irq_period;
+       u64 events, period;
+       s64 delta;
+
+       spin_lock(&ctx->lock);
+       list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+               if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+                       continue;
+
+               if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
+                       continue;
+
+               events = HZ * counter->hw.interrupts * counter->hw.irq_period;
+               period = div64_u64(events, counter->hw_event.irq_freq);
+
+               delta = (s64)(1 + period - counter->hw.irq_period);
+               delta >>= 1;
+
+               irq_period = counter->hw.irq_period + delta;
+
+               if (!irq_period)
+                       irq_period = 1;
+
+               counter->hw.irq_period = irq_period;
+               counter->hw.interrupts = 0;
+       }
+       spin_unlock(&ctx->lock);
+}
+
 /*
  * Round-robin a context's counters:
  */
@@ -1081,6 +1113,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
        cpuctx = &per_cpu(perf_cpu_context, cpu);
        ctx = &curr->perf_counter_ctx;
 
+       perf_adjust_freq(&cpuctx->ctx);
+       perf_adjust_freq(ctx);
+
        perf_counter_cpu_sched_out(cpuctx);
        __perf_counter_task_sched_out(ctx);
 
@@ -2382,6 +2417,8 @@ int perf_counter_overflow(struct perf_counter *counter,
        int events = atomic_read(&counter->event_limit);
        int ret = 0;
 
+       counter->hw.interrupts++;
+
        /*
         * XXX event_limit might not quite work as expected on inherited
         * counters
@@ -2450,6 +2487,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
        enum hrtimer_restart ret = HRTIMER_RESTART;
        struct perf_counter *counter;
        struct pt_regs *regs;
+       u64 period;
 
        counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
        counter->pmu->read(counter);
@@ -2468,7 +2506,8 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
                        ret = HRTIMER_NORESTART;
        }
 
-       hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
+       period = max_t(u64, 10000, counter->hw.irq_period);
+       hrtimer_forward_now(hrtimer, ns_to_ktime(period));
 
        return ret;
 }
@@ -2629,8 +2668,9 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
        hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        hwc->hrtimer.function = perf_swcounter_hrtimer;
        if (hwc->irq_period) {
+               u64 period = max_t(u64, 10000, hwc->irq_period);
                __hrtimer_start_range_ns(&hwc->hrtimer,
-                               ns_to_ktime(hwc->irq_period), 0,
+                               ns_to_ktime(period), 0,
                                HRTIMER_MODE_REL, 0);
        }
 
@@ -2679,8 +2719,9 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
        hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        hwc->hrtimer.function = perf_swcounter_hrtimer;
        if (hwc->irq_period) {
+               u64 period = max_t(u64, 10000, hwc->irq_period);
                __hrtimer_start_range_ns(&hwc->hrtimer,
-                               ns_to_ktime(hwc->irq_period), 0,
+                               ns_to_ktime(period), 0,
                                HRTIMER_MODE_REL, 0);
        }
 
@@ -2811,9 +2852,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 
 static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 {
-       struct perf_counter_hw_event *hw_event = &counter->hw_event;
        const struct pmu *pmu = NULL;
-       struct hw_perf_counter *hwc = &counter->hw;
 
        /*
         * Software counters (currently) can't in general distinguish
@@ -2826,8 +2865,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
        case PERF_COUNT_CPU_CLOCK:
                pmu = &perf_ops_cpu_clock;
 
-               if (hw_event->irq_period && hw_event->irq_period < 10000)
-                       hw_event->irq_period = 10000;
                break;
        case PERF_COUNT_TASK_CLOCK:
                /*
@@ -2839,8 +2876,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
                else
                        pmu = &perf_ops_cpu_clock;
 
-               if (hw_event->irq_period && hw_event->irq_period < 10000)
-                       hw_event->irq_period = 10000;
                break;
        case PERF_COUNT_PAGE_FAULTS:
        case PERF_COUNT_PAGE_FAULTS_MIN:
@@ -2854,9 +2889,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
                break;
        }
 
-       if (pmu)
-               hwc->irq_period = hw_event->irq_period;
-
        return pmu;
 }
 
@@ -2872,6 +2904,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 {
        const struct pmu *pmu;
        struct perf_counter *counter;
+       struct hw_perf_counter *hwc;
        long err;
 
        counter = kzalloc(sizeof(*counter), gfpflags);
@@ -2907,6 +2940,12 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
        pmu = NULL;
 
+       hwc = &counter->hw;
+       if (hw_event->freq && hw_event->irq_freq)
+               hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
+       else
+               hwc->irq_period = hw_event->irq_period;
+
        /*
         * we currently do not support PERF_RECORD_GROUP on inherited counters
         */