tracing/rb: Convert to hotplug state machine
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>
Sat, 26 Nov 2016 23:13:34 +0000 (00:13 +0100)
committerThomas Gleixner <tglx@linutronix.de>
Thu, 1 Dec 2016 23:52:34 +0000 (00:52 +0100)
Install the callbacks via the state machine. The notifier in struct
ring_buffer is replaced by the multi instance interface.  Upon
__ring_buffer_alloc() invocation, cpuhp_state_add_instance() will invoke
the trace_rb_cpu_prepare() on each CPU.

This callback may now fail. This means __ring_buffer_alloc() will fail and
cleanup (like previously) and during a CPU up event this failure will not
allow the CPU to come up.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20161126231350.10321-7-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
include/linux/cpuhotplug.h
include/linux/ring_buffer.h
kernel/trace/ring_buffer.c
kernel/trace/trace.c

index e3771fb959c06b816acb6181f4b3433fea0d132b..18bcfeb2463e1bdf1b693d362e425a38e65fefd8 100644 (file)
@@ -62,6 +62,7 @@ enum cpuhp_state {
        CPUHP_TOPOLOGY_PREPARE,
        CPUHP_NET_IUCV_PREPARE,
        CPUHP_ARM_BL_PREPARE,
+       CPUHP_TRACE_RB_PREPARE,
        CPUHP_TIMERS_DEAD,
        CPUHP_NOTF_ERR_INJ_PREPARE,
        CPUHP_MIPS_SOC_PREPARE,
index 4acc552e92790c98f4e89721cc488d8441a289da..b6d4568795a78839f075c43681de5da1af0d0674 100644 (file)
@@ -198,4 +198,10 @@ enum ring_buffer_flags {
        RB_FL_OVERWRITE         = 1 << 0,
 };
 
+#ifdef CONFIG_RING_BUFFER
+int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node);
+#else
+#define trace_rb_cpu_prepare   NULL
+#endif
+
 #endif /* _LINUX_RING_BUFFER_H */
index 9c143739b8d73f0c7982be06eb0e5cdb902bfc81..a7a055f167c7c79a8845ae9d0e0489a2c0c73b6e 100644 (file)
@@ -479,9 +479,7 @@ struct ring_buffer {
 
        struct ring_buffer_per_cpu      **buffers;
 
-#ifdef CONFIG_HOTPLUG_CPU
-       struct notifier_block           cpu_notify;
-#endif
+       struct hlist_node               node;
        u64                             (*clock)(void);
 
        struct rb_irq_work              irq_work;
@@ -1274,11 +1272,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
        kfree(cpu_buffer);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-static int rb_cpu_notify(struct notifier_block *self,
-                        unsigned long action, void *hcpu);
-#endif
-
 /**
  * __ring_buffer_alloc - allocate a new ring_buffer
  * @size: the size in bytes per cpu that is needed.
@@ -1296,6 +1289,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
        long nr_pages;
        int bsize;
        int cpu;
+       int ret;
 
        /* keep it in its own cache line */
        buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
@@ -1318,17 +1312,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
        if (nr_pages < 2)
                nr_pages = 2;
 
-       /*
-        * In case of non-hotplug cpu, if the ring-buffer is allocated
-        * in early initcall, it will not be notified of secondary cpus.
-        * In that off case, we need to allocate for all possible cpus.
-        */
-#ifdef CONFIG_HOTPLUG_CPU
-       cpu_notifier_register_begin();
-       cpumask_copy(buffer->cpumask, cpu_online_mask);
-#else
-       cpumask_copy(buffer->cpumask, cpu_possible_mask);
-#endif
        buffer->cpus = nr_cpu_ids;
 
        bsize = sizeof(void *) * nr_cpu_ids;
@@ -1337,19 +1320,15 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
        if (!buffer->buffers)
                goto fail_free_cpumask;
 
-       for_each_buffer_cpu(buffer, cpu) {
-               buffer->buffers[cpu] =
-                       rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
-               if (!buffer->buffers[cpu])
-                       goto fail_free_buffers;
-       }
+       cpu = raw_smp_processor_id();
+       cpumask_set_cpu(cpu, buffer->cpumask);
+       buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
+       if (!buffer->buffers[cpu])
+               goto fail_free_buffers;
 
-#ifdef CONFIG_HOTPLUG_CPU
-       buffer->cpu_notify.notifier_call = rb_cpu_notify;
-       buffer->cpu_notify.priority = 0;
-       __register_cpu_notifier(&buffer->cpu_notify);
-       cpu_notifier_register_done();
-#endif
+       ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
+       if (ret < 0)
+               goto fail_free_buffers;
 
        mutex_init(&buffer->mutex);
 
@@ -1364,9 +1343,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
 
  fail_free_cpumask:
        free_cpumask_var(buffer->cpumask);
-#ifdef CONFIG_HOTPLUG_CPU
-       cpu_notifier_register_done();
-#endif
 
  fail_free_buffer:
        kfree(buffer);
@@ -1383,18 +1359,11 @@ ring_buffer_free(struct ring_buffer *buffer)
 {
        int cpu;
 
-#ifdef CONFIG_HOTPLUG_CPU
-       cpu_notifier_register_begin();
-       __unregister_cpu_notifier(&buffer->cpu_notify);
-#endif
+       cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
 
        for_each_buffer_cpu(buffer, cpu)
                rb_free_cpu_buffer(buffer->buffers[cpu]);
 
-#ifdef CONFIG_HOTPLUG_CPU
-       cpu_notifier_register_done();
-#endif
-
        kfree(buffer->buffers);
        free_cpumask_var(buffer->cpumask);
 
@@ -4633,62 +4602,48 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
 }
 EXPORT_SYMBOL_GPL(ring_buffer_read_page);
 
-#ifdef CONFIG_HOTPLUG_CPU
-static int rb_cpu_notify(struct notifier_block *self,
-                        unsigned long action, void *hcpu)
+/*
+ * We only allocate new buffers, never free them if the CPU goes down.
+ * If we were to free the buffer, then the user would lose any trace that was in
+ * the buffer.
+ */
+int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node)
 {
-       struct ring_buffer *buffer =
-               container_of(self, struct ring_buffer, cpu_notify);
-       long cpu = (long)hcpu;
+       struct ring_buffer *buffer;
        long nr_pages_same;
        int cpu_i;
        unsigned long nr_pages;
 
-       switch (action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               if (cpumask_test_cpu(cpu, buffer->cpumask))
-                       return NOTIFY_OK;
-
-               nr_pages = 0;
-               nr_pages_same = 1;
-               /* check if all cpu sizes are same */
-               for_each_buffer_cpu(buffer, cpu_i) {
-                       /* fill in the size from first enabled cpu */
-                       if (nr_pages == 0)
-                               nr_pages = buffer->buffers[cpu_i]->nr_pages;
-                       if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
-                               nr_pages_same = 0;
-                               break;
-                       }
-               }
-               /* allocate minimum pages, user can later expand it */
-               if (!nr_pages_same)
-                       nr_pages = 2;
-               buffer->buffers[cpu] =
-                       rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
-               if (!buffer->buffers[cpu]) {
-                       WARN(1, "failed to allocate ring buffer on CPU %ld\n",
-                            cpu);
-                       return NOTIFY_OK;
+       buffer = container_of(node, struct ring_buffer, node);
+       if (cpumask_test_cpu(cpu, buffer->cpumask))
+               return 0;
+
+       nr_pages = 0;
+       nr_pages_same = 1;
+       /* check if all cpu sizes are same */
+       for_each_buffer_cpu(buffer, cpu_i) {
+               /* fill in the size from first enabled cpu */
+               if (nr_pages == 0)
+                       nr_pages = buffer->buffers[cpu_i]->nr_pages;
+               if (nr_pages != buffer->buffers[cpu_i]->nr_pages) {
+                       nr_pages_same = 0;
+                       break;
                }
-               smp_wmb();
-               cpumask_set_cpu(cpu, buffer->cpumask);
-               break;
-       case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
-               /*
-                * Do nothing.
-                *  If we were to free the buffer, then the user would
-                *  lose any trace that was in the buffer.
-                */
-               break;
-       default:
-               break;
        }
-       return NOTIFY_OK;
+       /* allocate minimum pages, user can later expand it */
+       if (!nr_pages_same)
+               nr_pages = 2;
+       buffer->buffers[cpu] =
+               rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
+       if (!buffer->buffers[cpu]) {
+               WARN(1, "failed to allocate ring buffer on CPU %u\n",
+                    cpu);
+               return -ENOMEM;
+       }
+       smp_wmb();
+       cpumask_set_cpu(cpu, buffer->cpumask);
+       return 0;
 }
-#endif
 
 #ifdef CONFIG_RING_BUFFER_STARTUP_TEST
 /*
index 8696ce6bf2f68838caefb8025ae3a6b133a2de64..465d56febc5b805fd882ee831bb3d1e75308b6d1 100644 (file)
@@ -7659,10 +7659,21 @@ __init static int tracer_alloc_buffers(void)
 
        raw_spin_lock_init(&global_trace.start_lock);
 
+       /*
+        * The prepare callbacks allocates some memory for the ring buffer. We
+        * don't free the buffer if the if the CPU goes down. If we were to free
+        * the buffer, then the user would lose any trace that was in the
+        * buffer. The memory will be removed once the "instance" is removed.
+        */
+       ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
+                                     "trace/RB:preapre", trace_rb_cpu_prepare,
+                                     NULL);
+       if (ret < 0)
+               goto out_free_cpumask;
        /* Used for event triggers */
        temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
        if (!temp_buffer)
-               goto out_free_cpumask;
+               goto out_rm_hp_state;
 
        if (trace_create_savedcmd() < 0)
                goto out_free_temp_buffer;
@@ -7723,6 +7734,8 @@ out_free_savedcmd:
        free_saved_cmdlines_buffer(savedcmd);
 out_free_temp_buffer:
        ring_buffer_free(temp_buffer);
+out_rm_hp_state:
+       cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
 out_free_cpumask:
        free_cpumask_var(global_trace.tracing_cpumask);
 out_free_buffer_mask: