Simplify stop_machine
authorRusty Russell <rusty@rustcorp.com.au>
Mon, 28 Jul 2008 17:16:28 +0000 (12:16 -0500)
committerRusty Russell <rusty@rustcorp.com.au>
Mon, 28 Jul 2008 02:16:29 +0000 (12:16 +1000)
stop_machine creates a kthread which creates kernel threads.  We can
create those threads directly and simplify things a little.  Some care
must be taken with CPU hotunplug, which has special needs, but that code
seems more robust than it was in the past.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
include/linux/stop_machine.h
kernel/cpu.c
kernel/stop_machine.c

index 18af011c13af390d2746db739c98347caacb4ddf..36c2c7284eb3a0354a69b3112401bec4a0c24c92 100644 (file)
  * @data: the data ptr for the @fn()
  * @cpu: if @cpu == n, run @fn() on cpu n
  *       if @cpu == NR_CPUS, run @fn() on any cpu
- *       if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then
- *       concurrently on all the other cpus
+ *       if @cpu == ALL_CPUS, run @fn() on every online CPU.
  *
- * Description: This causes a thread to be scheduled on every other cpu,
- * each of which disables interrupts, and finally interrupts are disabled
- * on the current CPU.  The result is that noone is holding a spinlock
- * or inside any other preempt-disabled region when @fn() runs.
+ * Description: This causes a thread to be scheduled on every cpu,
+ * each of which disables interrupts.  The result is that noone is
+ * holding a spinlock or inside any other preempt-disabled region when
+ * @fn() runs.
  *
  * This can be thought of as a very heavy write lock, equivalent to
  * grabbing every spinlock in the kernel. */
@@ -35,13 +34,10 @@ int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
  * @data: the data ptr for the @fn
  * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
  *
- * Description: This is a special version of the above, which returns the
- * thread which has run @fn(): kthread_stop will return the return value
- * of @fn().  Used by hotplug cpu.
+ * Description: This is a special version of the above, which assumes cpus
+ * won't come or go while it's being called.  Used by hotplug cpu.
  */
-struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
-                                      unsigned int cpu);
-
+int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
 #else
 
 static inline int stop_machine_run(int (*fn)(void *), void *data,
index 10ba5f1004a5646bd9960bbe2156291edcb09fb0..cf79bb9113717da8b151f18c091f8dd6acfbf968 100644 (file)
@@ -216,7 +216,6 @@ static int __ref take_cpu_down(void *_param)
 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 {
        int err, nr_calls = 0;
-       struct task_struct *p;
        cpumask_t old_allowed, tmp;
        void *hcpu = (void *)(long)cpu;
        unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
@@ -250,19 +249,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
        cpu_clear(cpu, tmp);
        set_cpus_allowed_ptr(current, &tmp);
 
-       p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
+       err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
 
-       if (IS_ERR(p) || cpu_online(cpu)) {
+       if (err || cpu_online(cpu)) {
                /* CPU didn't die: tell everyone.  Can't complain. */
                if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
                                            hcpu) == NOTIFY_BAD)
                        BUG();
 
-               if (IS_ERR(p)) {
-                       err = PTR_ERR(p);
-                       goto out_allowed;
-               }
-               goto out_thread;
+               goto out_allowed;
        }
 
        /* Wait for it to sleep (leaving idle task). */
@@ -279,8 +274,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 
        check_for_tasks(cpu);
 
-out_thread:
-       err = kthread_stop(p);
 out_allowed:
        set_cpus_allowed_ptr(current, &old_allowed);
 out_release:
index a473bd0cb71b756f628604a123c800adf814f2fc..35882dccc94347ccf451e13c4bb4193fe73a91eb 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
+/* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
  * GPL v2 and any later version.
  */
 #include <linux/cpu.h>
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
 
-/* Since we effect priority and affinity (both of which are visible
- * to, and settable by outside processes) we do indirection via a
- * kthread. */
-
-/* Thread to stop each CPU in user context. */
+/* This controls the threads on each CPU. */
 enum stopmachine_state {
-       STOPMACHINE_WAIT,
+       /* Dummy starting state for thread. */
+       STOPMACHINE_NONE,
+       /* Awaiting everyone to be scheduled. */
        STOPMACHINE_PREPARE,
+       /* Disable interrupts. */
        STOPMACHINE_DISABLE_IRQ,
+       /* Run the function */
        STOPMACHINE_RUN,
+       /* Exit */
        STOPMACHINE_EXIT,
 };
+static enum stopmachine_state state;
 
 struct stop_machine_data {
        int (*fn)(void *);
        void *data;
-       struct completion done;
-       int run_all;
-} smdata;
+       int fnret;
+};
 
-static enum stopmachine_state stopmachine_state;
-static unsigned int stopmachine_num_threads;
-static atomic_t stopmachine_thread_ack;
+/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
+static unsigned int num_threads;
+static atomic_t thread_ack;
+static struct completion finished;
+static DEFINE_MUTEX(lock);
 
-static int stopmachine(void *cpu)
+static void set_state(enum stopmachine_state newstate)
 {
-       int irqs_disabled = 0;
-       int prepared = 0;
-       int ran = 0;
-       cpumask_of_cpu_ptr(cpumask, (int)(long)cpu);
-
-       set_cpus_allowed_ptr(current, cpumask);
-
-       /* Ack: we are alive */
-       smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
-       atomic_inc(&stopmachine_thread_ack);
-
-       /* Simple state machine */
-       while (stopmachine_state != STOPMACHINE_EXIT) {
-               if (stopmachine_state == STOPMACHINE_DISABLE_IRQ 
-                   && !irqs_disabled) {
-                       local_irq_disable();
-                       hard_irq_disable();
-                       irqs_disabled = 1;
-                       /* Ack: irqs disabled. */
-                       smp_mb(); /* Must read state first. */
-                       atomic_inc(&stopmachine_thread_ack);
-               } else if (stopmachine_state == STOPMACHINE_PREPARE
-                          && !prepared) {
-                       /* Everyone is in place, hold CPU. */
-                       preempt_disable();
-                       prepared = 1;
-                       smp_mb(); /* Must read state first. */
-                       atomic_inc(&stopmachine_thread_ack);
-               } else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
-                       smdata.fn(smdata.data);
-                       ran = 1;
-                       smp_mb(); /* Must read state first. */
-                       atomic_inc(&stopmachine_thread_ack);
-               }
-               /* Yield in first stage: migration threads need to
-                * help our sisters onto their CPUs. */
-               if (!prepared && !irqs_disabled)
-                       yield();
-               cpu_relax();
-       }
-
-       /* Ack: we are exiting. */
-       smp_mb(); /* Must read state first. */
-       atomic_inc(&stopmachine_thread_ack);
-
-       if (irqs_disabled)
-               local_irq_enable();
-       if (prepared)
-               preempt_enable();
-
-       return 0;
+       /* Reset ack counter. */
+       atomic_set(&thread_ack, num_threads);
+       smp_wmb();
+       state = newstate;
 }
 
-/* Change the thread state */
-static void stopmachine_set_state(enum stopmachine_state state)
+/* Last one to ack a state moves to the next state. */
+static void ack_state(void)
 {
-       atomic_set(&stopmachine_thread_ack, 0);
-       smp_wmb();
-       stopmachine_state = state;
-       while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
-               cpu_relax();
+       if (atomic_dec_and_test(&thread_ack)) {
+               /* If we're the last one to ack the EXIT, we're finished. */
+               if (state == STOPMACHINE_EXIT)
+                       complete(&finished);
+               else
+                       set_state(state + 1);
+       }
 }
 
-static int stop_machine(void)
+/* This is the actual thread which stops the CPU.  It exits by itself rather
+ * than waiting for kthread_stop(), because it's easier for hotplug CPU. */
+static int stop_cpu(struct stop_machine_data *smdata)
 {
-       int i, ret = 0;
-
-       atomic_set(&stopmachine_thread_ack, 0);
-       stopmachine_num_threads = 0;
-       stopmachine_state = STOPMACHINE_WAIT;
+       enum stopmachine_state curstate = STOPMACHINE_NONE;
+       int uninitialized_var(ret);
 
-       for_each_online_cpu(i) {
-               if (i == raw_smp_processor_id())
-                       continue;
-               ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
-               if (ret < 0)
-                       break;
-               stopmachine_num_threads++;
-       }
-
-       /* Wait for them all to come to life. */
-       while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) {
-               yield();
+       /* Simple state machine */
+       do {
+               /* Chill out and ensure we re-read stopmachine_state. */
                cpu_relax();
-       }
-
-       /* If some failed, kill them all. */
-       if (ret < 0) {
-               stopmachine_set_state(STOPMACHINE_EXIT);
-               return ret;
-       }
-
-       /* Now they are all started, make them hold the CPUs, ready. */
-       preempt_disable();
-       stopmachine_set_state(STOPMACHINE_PREPARE);
-
-       /* Make them disable irqs. */
-       local_irq_disable();
-       hard_irq_disable();
-       stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
-
-       return 0;
-}
+               if (state != curstate) {
+                       curstate = state;
+                       switch (curstate) {
+                       case STOPMACHINE_DISABLE_IRQ:
+                               local_irq_disable();
+                               hard_irq_disable();
+                               break;
+                       case STOPMACHINE_RUN:
+                               /* |= allows error detection if functions on
+                                * multiple CPUs. */
+                               smdata->fnret |= smdata->fn(smdata->data);
+                               break;
+                       default:
+                               break;
+                       }
+                       ack_state();
+               }
+       } while (curstate != STOPMACHINE_EXIT);
 
-static void restart_machine(void)
-{
-       stopmachine_set_state(STOPMACHINE_EXIT);
        local_irq_enable();
-       preempt_enable_no_resched();
+       do_exit(0);
 }
 
-static void run_other_cpus(void)
+/* Callback for CPUs which aren't supposed to do anything. */
+static int chill(void *unused)
 {
-       stopmachine_set_state(STOPMACHINE_RUN);
+       return 0;
 }
 
-static int do_stop(void *_smdata)
+int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 {
-       struct stop_machine_data *smdata = _smdata;
-       int ret;
+       int i, err;
+       struct stop_machine_data active, idle;
+       struct task_struct **threads;
+
+       active.fn = fn;
+       active.data = data;
+       active.fnret = 0;
+       idle.fn = chill;
+       idle.data = NULL;
+
+       /* If they don't care which cpu fn runs on, just pick one. */
+       if (cpu == NR_CPUS)
+               cpu = any_online_cpu(cpu_online_map);
+
+       /* This could be too big for stack on large machines. */
+       threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
+       if (!threads)
+               return -ENOMEM;
+
+       /* Set up initial state. */
+       mutex_lock(&lock);
+       init_completion(&finished);
+       num_threads = num_online_cpus();
+       set_state(STOPMACHINE_PREPARE);
 
-       ret = stop_machine();
-       if (ret == 0) {
-               ret = smdata->fn(smdata->data);
-               if (smdata->run_all)
-                       run_other_cpus();
-               restart_machine();
-       }
+       for_each_online_cpu(i) {
+               struct stop_machine_data *smdata;
+               struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 
-       /* We're done: you can kthread_stop us now */
-       complete(&smdata->done);
+               if (cpu == ALL_CPUS || i == cpu)
+                       smdata = &active;
+               else
+                       smdata = &idle;
+
+               threads[i] = kthread_create((void *)stop_cpu, smdata, "kstop%u",
+                                           i);
+               if (IS_ERR(threads[i])) {
+                       err = PTR_ERR(threads[i]);
+                       threads[i] = NULL;
+                       goto kill_threads;
+               }
 
-       /* Wait for kthread_stop */
-       set_current_state(TASK_INTERRUPTIBLE);
-       while (!kthread_should_stop()) {
-               schedule();
-               set_current_state(TASK_INTERRUPTIBLE);
-       }
-       __set_current_state(TASK_RUNNING);
-       return ret;
-}
+               /* Place it onto correct cpu. */
+               kthread_bind(threads[i], i);
 
-struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
-                                      unsigned int cpu)
-{
-       static DEFINE_MUTEX(stopmachine_mutex);
-       struct stop_machine_data smdata;
-       struct task_struct *p;
+               /* Make it highest prio. */
+               if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
+                       BUG();
+       }
 
-       mutex_lock(&stopmachine_mutex);
+       /* We've created all the threads.  Wake them all: hold this CPU so one
+        * doesn't hit this CPU until we're ready. */
+       cpu = get_cpu();
+       for_each_online_cpu(i)
+               wake_up_process(threads[i]);
 
-       smdata.fn = fn;
-       smdata.data = data;
-       smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0;
-       init_completion(&smdata.done);
+       /* This will release the thread on our CPU. */
+       put_cpu();
+       wait_for_completion(&finished);
+       mutex_unlock(&lock);
 
-       smp_wmb(); /* make sure other cpus see smdata updates */
+       kfree(threads);
 
-       /* If they don't care which CPU fn runs on, bind to any online one. */
-       if (cpu == NR_CPUS || cpu == ALL_CPUS)
-               cpu = raw_smp_processor_id();
+       return active.fnret;
 
-       p = kthread_create(do_stop, &smdata, "kstopmachine");
-       if (!IS_ERR(p)) {
-               struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+kill_threads:
+       for_each_online_cpu(i)
+               if (threads[i])
+                       kthread_stop(threads[i]);
+       mutex_unlock(&lock);
 
-               /* One high-prio thread per cpu.  We'll do this one. */
-               sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
-               kthread_bind(p, cpu);
-               wake_up_process(p);
-               wait_for_completion(&smdata.done);
-       }
-       mutex_unlock(&stopmachine_mutex);
-       return p;
+       kfree(threads);
+       return err;
 }
 
 int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
 {
-       struct task_struct *p;
        int ret;
 
        /* No CPUs can come up or down during this. */
        get_online_cpus();
-       p = __stop_machine_run(fn, data, cpu);
-       if (!IS_ERR(p))
-               ret = kthread_stop(p);
-       else
-               ret = PTR_ERR(p);
+       ret = __stop_machine_run(fn, data, cpu);
        put_online_cpus();
 
        return ret;