ftrace: trace preempt off critical timings
authorSteven Rostedt <srostedt@redhat.com>
Mon, 12 May 2008 19:20:42 +0000 (21:20 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Fri, 23 May 2008 18:32:54 +0000 (20:32 +0200)
Add preempt off timings. A lot of kernel core code is taken from the RT patch
latency trace that was written by Ingo Molnar.

This adds "preemptoff" and "preemptirqsoff" to /debugfs/tracing/available_tracers

Now instead of just tracing irqs off, preemption off can be selected
to be recorded.

When this is selected, it shares the same files as irqs off timings.
One can either trace preemption off, irqs off, or one or the other off.

By echoing "preemptoff" into /debugfs/tracing/current_tracer, recording
of preempt off only is performed. "irqsoff" will only record the time
irqs are disabled, but "preemptirqsoff" will take the total time irqs
or preemption are disabled. Runtime switching of these options is now
supported by simpling echoing in the appropriate trace name into
/debugfs/tracing/current_tracer.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
arch/x86/kernel/process_32.c
include/linux/ftrace.h
include/linux/irqflags.h
include/linux/preempt.h
kernel/sched.c
kernel/trace/Kconfig
kernel/trace/Makefile
kernel/trace/trace_irqsoff.c

index f8476dfbb60d9818f5da08d3a5b95a223930964f..a30aa1f2607a7d330c0a8b93d06940222aac8902 100644 (file)
@@ -185,7 +185,10 @@ void cpu_idle(void)
 
                        local_irq_disable();
                        __get_cpu_var(irq_stat).idle_timestamp = jiffies;
+                       /* Don't trace irqs off for idle */
+                       stop_critical_timings();
                        idle();
+                       start_critical_timings();
                }
                tick_nohz_restart_sched_tick();
                preempt_enable_no_resched();
index 0a20445dcbcc35bfa3ccdb54641be938a1a1d185..740c97dcf9cb831fc2092458b913c3a1de571a72 100644 (file)
@@ -58,4 +58,12 @@ extern void mcount(void);
 # define time_hardirqs_off(a0, a1)             do { } while (0)
 #endif
 
+#ifdef CONFIG_PREEMPT_TRACER
+  extern void notrace trace_preempt_on(unsigned long a0, unsigned long a1);
+  extern void notrace trace_preempt_off(unsigned long a0, unsigned long a1);
+#else
+# define trace_preempt_on(a0, a1)              do { } while (0)
+# define trace_preempt_off(a0, a1)             do { } while (0)
+#endif
+
 #endif /* _LINUX_FTRACE_H */
index 5b711d4e9fd9ee2300ae600984736547c4ad2303..2b1c2e58566ea04460370fef4a4b686794ab512d 100644 (file)
@@ -41,7 +41,8 @@
 # define INIT_TRACE_IRQFLAGS
 #endif
 
-#ifdef CONFIG_IRQSOFF_TRACER
+#if defined(CONFIG_IRQSOFF_TRACER) || \
+       defined(CONFIG_PREEMPT_TRACER)
  extern void stop_critical_timings(void);
  extern void start_critical_timings(void);
 #else
index 36b03d50bf4097cb5b46c592933ce5aa19f7a55a..72b1a10a59b6c178bf192f9d06c3c6c38a33e0c4 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/linkage.h>
 #include <linux/list.h>
 
-#ifdef CONFIG_DEBUG_PREEMPT
+#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
   extern void add_preempt_count(int val);
   extern void sub_preempt_count(int val);
 #else
index 73e60085236503703717ceb99bb8eb7c9ba63ab7..328494e28df25cf5a633eba587b59f17e2daf2e3 100644 (file)
@@ -70,6 +70,7 @@
 #include <linux/bootmem.h>
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
+#include <linux/ftrace.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
@@ -4365,26 +4366,44 @@ void scheduler_tick(void)
 #endif
 }
 
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
+#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
+                               defined(CONFIG_PREEMPT_TRACER))
+
+static inline unsigned long get_parent_ip(unsigned long addr)
+{
+       if (in_lock_functions(addr)) {
+               addr = CALLER_ADDR2;
+               if (in_lock_functions(addr))
+                       addr = CALLER_ADDR3;
+       }
+       return addr;
+}
 
 void __kprobes add_preempt_count(int val)
 {
+#ifdef CONFIG_DEBUG_PREEMPT
        /*
         * Underflow?
         */
        if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
                return;
+#endif
        preempt_count() += val;
+#ifdef CONFIG_DEBUG_PREEMPT
        /*
         * Spinlock count overflowing soon?
         */
        DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
                                PREEMPT_MASK - 10);
+#endif
+       if (preempt_count() == val)
+               trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
 }
 EXPORT_SYMBOL(add_preempt_count);
 
 void __kprobes sub_preempt_count(int val)
 {
+#ifdef CONFIG_DEBUG_PREEMPT
        /*
         * Underflow?
         */
@@ -4396,7 +4415,10 @@ void __kprobes sub_preempt_count(int val)
        if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
                        !(preempt_count() & PREEMPT_MASK)))
                return;
+#endif
 
+       if (preempt_count() == val)
+               trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
        preempt_count() -= val;
 }
 EXPORT_SYMBOL(sub_preempt_count);
index 896df1cf6adc36457390f7bb16ad7c703659a293..6430016b98e8fcb16dea44d53c77c53ef4753e88 100644 (file)
@@ -44,6 +44,31 @@ config IRQSOFF_TRACER
 
              echo 0 > /debugfs/tracing/tracing_max_latency
 
+         (Note that kernel size and overhead increases with this option
+         enabled. This option and the preempt-off timing option can be
+         used together or separately.)
+
+config PREEMPT_TRACER
+       bool "Preemption-off Latency Tracer"
+       default n
+       depends on GENERIC_TIME
+       depends on PREEMPT
+       select TRACING
+       select TRACER_MAX_TRACE
+       help
+         This option measures the time spent in preemption off critical
+         sections, with microsecond accuracy.
+
+         The default measurement method is a maximum search, which is
+         disabled by default and can be runtime (re-)started
+         via:
+
+             echo 0 > /debugfs/tracing/tracing_max_latency
+
+         (Note that kernel size and overhead increases with this option
+         enabled. This option and the irqs-off timing option can be
+         used together or separately.)
+
 config SCHED_TRACER
        bool "Scheduling Latency Tracer"
        depends on DEBUG_KERNEL
index 46be8647fb6523edb67b5fe9b255cddf0dcd1be0..3fec653d65334e9551c3e6c6e1f3437dc875cb51 100644 (file)
@@ -4,6 +4,7 @@ obj-$(CONFIG_TRACING) += trace.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
 obj-$(CONFIG_FTRACE) += trace_functions.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
+obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
 
 libftrace-y := ftrace.o
index a9131b0cf1a57d133acb1d7ee3c677823f4366c5..8b1231633dc5562b2a11c984db41ef84fcfaef78 100644 (file)
 static struct trace_array              *irqsoff_trace __read_mostly;
 static int                             tracer_enabled __read_mostly;
 
+static DEFINE_PER_CPU(int, tracing_cpu);
+
+enum {
+       TRACER_IRQS_OFF         = (1 << 1),
+       TRACER_PREEMPT_OFF      = (1 << 2),
+};
+
+static int trace_type __read_mostly;
+
+#ifdef CONFIG_PREEMPT_TRACER
+static inline int notrace
+preempt_trace(void)
+{
+       return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count());
+}
+#else
+# define preempt_trace() (0)
+#endif
+
+#ifdef CONFIG_IRQSOFF_TRACER
+static inline int notrace
+irq_trace(void)
+{
+       return ((trace_type & TRACER_IRQS_OFF) &&
+               irqs_disabled());
+}
+#else
+# define irq_trace() (0)
+#endif
+
 /*
  * Sequence count - we record it when starting a measurement and
  * skip the latency if the sequence has changed - some other section
@@ -44,14 +74,11 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
        long disabled;
        int cpu;
 
-       if (likely(!tracer_enabled))
+       if (likely(!__get_cpu_var(tracing_cpu)))
                return;
 
        local_save_flags(flags);
 
-       if (!irqs_disabled_flags(flags))
-               return;
-
        cpu = raw_smp_processor_id();
        data = tr->data[cpu];
        disabled = atomic_inc_return(&data->disabled);
@@ -171,23 +198,29 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
        if (likely(!tracer_enabled))
                return;
 
+       if (__get_cpu_var(tracing_cpu))
+               return;
+
        cpu = raw_smp_processor_id();
        data = tr->data[cpu];
 
        if (unlikely(!data) || unlikely(!data->trace) ||
-           data->critical_start || atomic_read(&data->disabled))
+           atomic_read(&data->disabled))
                return;
 
        atomic_inc(&data->disabled);
 
        data->critical_sequence = max_sequence;
        data->preempt_timestamp = now(cpu);
-       data->critical_start = parent_ip;
+       data->critical_start = parent_ip ? : ip;
        tracing_reset(data);
 
        local_save_flags(flags);
+
        ftrace(tr, data, ip, parent_ip, flags);
 
+       __get_cpu_var(tracing_cpu) = 1;
+
        atomic_dec(&data->disabled);
 }
 
@@ -199,7 +232,13 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
        struct trace_array_cpu *data;
        unsigned long flags;
 
-       if (likely(!tracer_enabled))
+       /* Always clear the tracing cpu on stopping the trace */
+       if (unlikely(__get_cpu_var(tracing_cpu)))
+               __get_cpu_var(tracing_cpu) = 0;
+       else
+               return;
+
+       if (!tracer_enabled)
                return;
 
        cpu = raw_smp_processor_id();
@@ -212,49 +251,35 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
        atomic_inc(&data->disabled);
        local_save_flags(flags);
        ftrace(tr, data, ip, parent_ip, flags);
-       check_critical_timing(tr, data, parent_ip, cpu);
+       check_critical_timing(tr, data, parent_ip ? : ip, cpu);
        data->critical_start = 0;
        atomic_dec(&data->disabled);
 }
 
+/* start and stop critical timings used to for stoppage (in idle) */
 void notrace start_critical_timings(void)
 {
-       unsigned long flags;
-
-       local_save_flags(flags);
-
-       if (irqs_disabled_flags(flags))
+       if (preempt_trace() || irq_trace())
                start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
 }
 
 void notrace stop_critical_timings(void)
 {
-       unsigned long flags;
-
-       local_save_flags(flags);
-
-       if (irqs_disabled_flags(flags))
+       if (preempt_trace() || irq_trace())
                stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
 }
 
+#ifdef CONFIG_IRQSOFF_TRACER
 #ifdef CONFIG_PROVE_LOCKING
 void notrace time_hardirqs_on(unsigned long a0, unsigned long a1)
 {
-       unsigned long flags;
-
-       local_save_flags(flags);
-
-       if (irqs_disabled_flags(flags))
+       if (!preempt_trace() && irq_trace())
                stop_critical_timing(a0, a1);
 }
 
 void notrace time_hardirqs_off(unsigned long a0, unsigned long a1)
 {
-       unsigned long flags;
-
-       local_save_flags(flags);
-
-       if (irqs_disabled_flags(flags))
+       if (!preempt_trace() && irq_trace())
                start_critical_timing(a0, a1);
 }
 
@@ -289,49 +314,46 @@ inline void print_irqtrace_events(struct task_struct *curr)
  */
 void notrace trace_hardirqs_on(void)
 {
-       unsigned long flags;
-
-       local_save_flags(flags);
-
-       if (irqs_disabled_flags(flags))
+       if (!preempt_trace() && irq_trace())
                stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
 }
 EXPORT_SYMBOL(trace_hardirqs_on);
 
 void notrace trace_hardirqs_off(void)
 {
-       unsigned long flags;
-
-       local_save_flags(flags);
-
-       if (irqs_disabled_flags(flags))
+       if (!preempt_trace() && irq_trace())
                start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
 }
 EXPORT_SYMBOL(trace_hardirqs_off);
 
 void notrace trace_hardirqs_on_caller(unsigned long caller_addr)
 {
-       unsigned long flags;
-
-       local_save_flags(flags);
-
-       if (irqs_disabled_flags(flags))
+       if (!preempt_trace() && irq_trace())
                stop_critical_timing(CALLER_ADDR0, caller_addr);
 }
 EXPORT_SYMBOL(trace_hardirqs_on_caller);
 
 void notrace trace_hardirqs_off_caller(unsigned long caller_addr)
 {
-       unsigned long flags;
-
-       local_save_flags(flags);
-
-       if (irqs_disabled_flags(flags))
+       if (!preempt_trace() && irq_trace())
                start_critical_timing(CALLER_ADDR0, caller_addr);
 }
 EXPORT_SYMBOL(trace_hardirqs_off_caller);
 
 #endif /* CONFIG_PROVE_LOCKING */
+#endif /*  CONFIG_IRQSOFF_TRACER */
+
+#ifdef CONFIG_PREEMPT_TRACER
+void notrace trace_preempt_on(unsigned long a0, unsigned long a1)
+{
+       stop_critical_timing(a0, a1);
+}
+
+void notrace trace_preempt_off(unsigned long a0, unsigned long a1)
+{
+       start_critical_timing(a0, a1);
+}
+#endif /* CONFIG_PREEMPT_TRACER */
 
 static void start_irqsoff_tracer(struct trace_array *tr)
 {
@@ -345,7 +367,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr)
        tracer_enabled = 0;
 }
 
-static void irqsoff_tracer_init(struct trace_array *tr)
+static void __irqsoff_tracer_init(struct trace_array *tr)
 {
        irqsoff_trace = tr;
        /* make sure that the tracer is visibel */
@@ -382,6 +404,13 @@ static void notrace irqsoff_tracer_close(struct trace_iterator *iter)
                start_irqsoff_tracer(iter->tr);
 }
 
+#ifdef CONFIG_IRQSOFF_TRACER
+static void irqsoff_tracer_init(struct trace_array *tr)
+{
+       trace_type = TRACER_IRQS_OFF;
+
+       __irqsoff_tracer_init(tr);
+}
 static struct tracer irqsoff_tracer __read_mostly =
 {
        .name           = "irqsoff",
@@ -392,10 +421,65 @@ static struct tracer irqsoff_tracer __read_mostly =
        .ctrl_update    = irqsoff_tracer_ctrl_update,
        .print_max      = 1,
 };
+# define register_irqsoff(trace) register_tracer(&trace)
+#else
+# define register_irqsoff(trace) do { } while (0)
+#endif
+
+#ifdef CONFIG_PREEMPT_TRACER
+static void preemptoff_tracer_init(struct trace_array *tr)
+{
+       trace_type = TRACER_PREEMPT_OFF;
+
+       __irqsoff_tracer_init(tr);
+}
+
+static struct tracer preemptoff_tracer __read_mostly =
+{
+       .name           = "preemptoff",
+       .init           = preemptoff_tracer_init,
+       .reset          = irqsoff_tracer_reset,
+       .open           = irqsoff_tracer_open,
+       .close          = irqsoff_tracer_close,
+       .ctrl_update    = irqsoff_tracer_ctrl_update,
+       .print_max      = 1,
+};
+# define register_preemptoff(trace) register_tracer(&trace)
+#else
+# define register_preemptoff(trace) do { } while (0)
+#endif
+
+#if defined(CONFIG_IRQSOFF_TRACER) && \
+       defined(CONFIG_PREEMPT_TRACER)
+
+static void preemptirqsoff_tracer_init(struct trace_array *tr)
+{
+       trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
+
+       __irqsoff_tracer_init(tr);
+}
+
+static struct tracer preemptirqsoff_tracer __read_mostly =
+{
+       .name           = "preemptirqsoff",
+       .init           = preemptirqsoff_tracer_init,
+       .reset          = irqsoff_tracer_reset,
+       .open           = irqsoff_tracer_open,
+       .close          = irqsoff_tracer_close,
+       .ctrl_update    = irqsoff_tracer_ctrl_update,
+       .print_max      = 1,
+};
+
+# define register_preemptirqsoff(trace) register_tracer(&trace)
+#else
+# define register_preemptirqsoff(trace) do { } while (0)
+#endif
 
 __init static int init_irqsoff_tracer(void)
 {
-       register_tracer(&irqsoff_tracer);
+       register_irqsoff(irqsoff_tracer);
+       register_preemptoff(preemptoff_tracer);
+       register_preemptirqsoff(preemptirqsoff_tracer);
 
        return 0;
 }