arm: perf: fold percpu_pmu into pmu_hw_events
authorMark Rutland <mark.rutland@arm.com>
Tue, 13 May 2014 18:46:10 +0000 (19:46 +0100)
committerWill Deacon <will.deacon@arm.com>
Thu, 30 Oct 2014 12:17:00 +0000 (12:17 +0000)
Currently the percpu_pmu pointers used as percpu_irq dev_id values are
defined separately from the other per-cpu accounting data, which make
dynamically allocating the data (as will be required for systems with
heterogeneous CPUs) difficult.

This patch moves the percpu_pmu pointers into pmu_hw_events (which is
itself allocated per cpu), which will allow for easier dynamic
allocation. Both percpu and regular irqs are requested using percpu_pmu
pointers as tokens, freeing us from having to know whether an irq is
percpu within the handler, and thus avoiding a radix tree lookup on the
handler path.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
arch/arm/include/asm/pmu.h
arch/arm/kernel/perf_event.c
arch/arm/kernel/perf_event_cpu.c

index f273dd2285a1fee88915cc3d89cc2eaf1c203bfa..cc0149835507b29ff6cec8606c1cf9cd8f5a4906 100644 (file)
@@ -81,6 +81,12 @@ struct pmu_hw_events {
         * read/modify/write sequences.
         */
        raw_spinlock_t          pmu_lock;
+
+       /*
+        * When using percpu IRQs, we need a percpu dev_id. Place it here as we
+        * already have to allocate this struct per cpu.
+        */
+       struct arm_pmu          *percpu_pmu;
 };
 
 struct arm_pmu {
index 05ac5ee6e2bb87c40bb5029f4425732889cea1d9..e34934f63a492d23249d91e88831566cf9c8f76c 100644 (file)
@@ -304,17 +304,21 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
        int ret;
        u64 start_clock, finish_clock;
 
-       if (irq_is_percpu(irq))
-               dev = *(void **)dev;
-       armpmu = dev;
+       /*
+        * we request the IRQ with a (possibly percpu) struct arm_pmu**, but
+        * the handlers expect a struct arm_pmu*. The percpu_irq framework will
+        * do any necessary shifting, we just need to perform the first
+        * dereference.
+        */
+       armpmu = *(void **)dev;
        plat_device = armpmu->plat_device;
        plat = dev_get_platdata(&plat_device->dev);
 
        start_clock = sched_clock();
        if (plat && plat->handle_irq)
-               ret = plat->handle_irq(irq, dev, armpmu->handle_irq);
+               ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq);
        else
-               ret = armpmu->handle_irq(irq, dev);
+               ret = armpmu->handle_irq(irq, armpmu);
        finish_clock = sched_clock();
 
        perf_sample_event_took(finish_clock - start_clock);
index fd24ad84dba622e56e1a3e6f9b27b0b5632f77b0..b9391fa2368d1af1bd9c59b7d2f70b943abc29e3 100644 (file)
@@ -35,7 +35,6 @@
 /* Set at runtime when we know what CPU type we are. */
 static struct arm_pmu *cpu_pmu;
 
-static DEFINE_PER_CPU(struct arm_pmu *, percpu_pmu);
 static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
 
 /*
@@ -85,20 +84,21 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
 {
        int i, irq, irqs;
        struct platform_device *pmu_device = cpu_pmu->plat_device;
+       struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
 
        irqs = min(pmu_device->num_resources, num_possible_cpus());
 
        irq = platform_get_irq(pmu_device, 0);
        if (irq >= 0 && irq_is_percpu(irq)) {
                on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
-               free_percpu_irq(irq, &percpu_pmu);
+               free_percpu_irq(irq, &hw_events->percpu_pmu);
        } else {
                for (i = 0; i < irqs; ++i) {
                        if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
                                continue;
                        irq = platform_get_irq(pmu_device, i);
                        if (irq >= 0)
-                               free_irq(irq, cpu_pmu);
+                               free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, i));
                }
        }
 }
@@ -107,6 +107,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 {
        int i, err, irq, irqs;
        struct platform_device *pmu_device = cpu_pmu->plat_device;
+       struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
 
        if (!pmu_device)
                return -ENODEV;
@@ -119,7 +120,8 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 
        irq = platform_get_irq(pmu_device, 0);
        if (irq >= 0 && irq_is_percpu(irq)) {
-               err = request_percpu_irq(irq, handler, "arm-pmu", &percpu_pmu);
+               err = request_percpu_irq(irq, handler, "arm-pmu",
+                                        &hw_events->percpu_pmu);
                if (err) {
                        pr_err("unable to request IRQ%d for ARM PMU counters\n",
                                irq);
@@ -146,7 +148,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
 
                        err = request_irq(irq, handler,
                                          IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
-                                         cpu_pmu);
+                                         per_cpu_ptr(&hw_events->percpu_pmu, i));
                        if (err) {
                                pr_err("unable to request IRQ%d for ARM PMU counters\n",
                                        irq);
@@ -166,7 +168,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
        for_each_possible_cpu(cpu) {
                struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
                raw_spin_lock_init(&events->pmu_lock);
-               per_cpu(percpu_pmu, cpu) = cpu_pmu;
+               events->percpu_pmu = cpu_pmu;
        }
 
        cpu_pmu->hw_events      = &cpu_hw_events;