perf_counter, x86: Fix/improve apic fallback
authorIngo Molnar <mingo@elte.hu>
Tue, 11 Aug 2009 08:40:08 +0000 (10:40 +0200)
committerIngo Molnar <mingo@elte.hu>
Wed, 12 Aug 2009 12:12:49 +0000 (14:12 +0200)
Johannes Stezenbach reported that his Pentium-M based
laptop does not have the local APIC enabled by default,
and hence perfcounters do not get initialized.

Add a fallback for this case: allow non-sampled counters
and return with an error on sampled counters. This allows
'perf stat' to work out of box - and allows 'perf top'
and 'perf record' to fall back on a hrtimer based sampling
method.

( Passing 'lapic' on the boot line will allow hardware
  sampling to occur - but if the APIC is disabled
  permanently by the hardware then this fallback still
  allows more systems to use perfcounters. )

Also decouple perfcounter support from X86_LOCAL_APIC.

-v2: fix typo breaking counters on all other systems ...

Reported-by: Johannes Stezenbach <js@sig21.net>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/Kconfig
arch/x86/kernel/cpu/perf_counter.c

index 738bdc6b0f8b8dcd938eddefa79beb4ed77e16f2..13ffa5df37d75120e6a3965102b24a05d3ee0fb7 100644 (file)
@@ -24,6 +24,7 @@ config X86
        select HAVE_UNSTABLE_SCHED_CLOCK
        select HAVE_IDE
        select HAVE_OPROFILE
+       select HAVE_PERF_COUNTERS if (!M386 && !M486)
        select HAVE_IOREMAP_PROT
        select HAVE_KPROBES
        select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -742,7 +743,6 @@ config X86_UP_IOAPIC
 config X86_LOCAL_APIC
        def_bool y
        depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
-       select HAVE_PERF_COUNTERS if (!M386 && !M486)
 
 config X86_IO_APIC
        def_bool y
index fffc126dbdf09ac66469372200b220c25ff9d2c4..900332b800f870d9f3f9e03bf2c5fa12505bc914 100644 (file)
@@ -55,6 +55,7 @@ struct x86_pmu {
        int             num_counters_fixed;
        int             counter_bits;
        u64             counter_mask;
+       int             apic;
        u64             max_period;
        u64             intel_ctrl;
 };
@@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
 
 static bool reserve_pmc_hardware(void)
 {
+#ifdef CONFIG_X86_LOCAL_APIC
        int i;
 
        if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void)
                if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
                        goto eventsel_fail;
        }
+#endif
 
        return true;
 
+#ifdef CONFIG_X86_LOCAL_APIC
 eventsel_fail:
        for (i--; i >= 0; i--)
                release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -644,10 +648,12 @@ perfctr_fail:
                enable_lapic_nmi_watchdog();
 
        return false;
+#endif
 }
 
 static void release_pmc_hardware(void)
 {
+#ifdef CONFIG_X86_LOCAL_APIC
        int i;
 
        for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -657,6 +663,7 @@ static void release_pmc_hardware(void)
 
        if (nmi_watchdog == NMI_LOCAL_APIC)
                enable_lapic_nmi_watchdog();
+#endif
 }
 
 static void hw_perf_counter_destroy(struct perf_counter *counter)
@@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
                hwc->sample_period = x86_pmu.max_period;
                hwc->last_period = hwc->sample_period;
                atomic64_set(&hwc->period_left, hwc->sample_period);
+       } else {
+               /*
+                * If we have a PMU initialized but no APIC
+                * interrupts, we cannot sample hardware
+                * counters (user-space has to fall back and
+                * sample via a hrtimer based software counter):
+                */
+               if (!x86_pmu.apic)
+                       return -EOPNOTSUPP;
        }
 
        counter->destroy = hw_perf_counter_destroy;
@@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
 
 void set_perf_counter_pending(void)
 {
+#ifdef CONFIG_X86_LOCAL_APIC
        apic->send_IPI_self(LOCAL_PENDING_VECTOR);
+#endif
 }
 
 void perf_counters_lapic_init(void)
 {
-       if (!x86_pmu_initialized())
+#ifdef CONFIG_X86_LOCAL_APIC
+       if (!x86_pmu.apic || !x86_pmu_initialized())
                return;
 
        /*
         * Always use NMI for PMU
         */
        apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
 }
 
 static int __kprobes
@@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
 
        regs = args->regs;
 
+#ifdef CONFIG_X86_LOCAL_APIC
        apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
        /*
         * Can't rely on the handled return value to say it was our NMI, two
         * counters could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = {
        .event_map              = p6_pmu_event_map,
        .raw_event              = p6_pmu_raw_event,
        .max_events             = ARRAY_SIZE(p6_perfmon_event_map),
+       .apic                   = 1,
        .max_period             = (1ULL << 31) - 1,
        .version                = 0,
        .num_counters           = 2,
@@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
        .event_map              = intel_pmu_event_map,
        .raw_event              = intel_pmu_raw_event,
        .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
+       .apic                   = 1,
        /*
         * Intel PMCs cannot be accessed sanely above 32 bit width,
         * so we install an artificial 1<<31 period regardless of
@@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = {
        .num_counters           = 4,
        .counter_bits           = 48,
        .counter_mask           = (1ULL << 48) - 1,
+       .apic                   = 1,
        /* use highest bit to detect overflow */
        .max_period             = (1ULL << 47) - 1,
 };
@@ -1589,13 +1614,14 @@ static int p6_pmu_init(void)
                return -ENODEV;
        }
 
+       x86_pmu = p6_pmu;
+
        if (!cpu_has_apic) {
                pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
-               return -ENODEV;
+               pr_info("no hardware sampling interrupt available.\n");
+               x86_pmu.apic = 0;
        }
 
-       x86_pmu                         = p6_pmu;
-
        return 0;
 }