stop_machine: reimplement using cpu_stop
authorTejun Heo <tj@kernel.org>
Thu, 6 May 2010 16:49:20 +0000 (18:49 +0200)
committerTejun Heo <tj@kernel.org>
Thu, 6 May 2010 16:49:20 +0000 (18:49 +0200)
Reimplement stop_machine using cpu_stop.  As cpu stoppers are
guaranteed to be available for all online cpus,
stop_machine_create/destroy() are no longer necessary and removed.

With resource management and synchronization handled by cpu_stop, the
new implementation is much simpler.  Asking the cpu_stop to execute
the stop_cpu() state machine on all online cpus with cpu hotplug
disabled is enough.

stop_machine itself doesn't need to manage any global resources
anymore, so all per-instance information is rolled into struct
stop_machine_data and the mutex and all static data variables are
removed.

The previous implementation created and destroyed RT workqueues as
necessary which made stop_machine() calls highly expensive on very
large machines.  According to Dimitri Sivanich, preventing the dynamic
creation/destruction makes booting faster more than twice on very
large machines.  cpu_stop resources are preallocated for all online
cpus and should have the same effect.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
arch/s390/kernel/time.c
drivers/xen/manage.c
include/linux/stop_machine.h
kernel/cpu.c
kernel/module.c
kernel/stop_machine.c

index fba6dec156bf4c27b1f469b9e07543a05d21ded8..03d96569f187b33ac4d46cf9c462f7f6b0d239fc 100644 (file)
@@ -390,7 +390,6 @@ static void __init time_init_wq(void)
        if (time_sync_wq)
                return;
        time_sync_wq = create_singlethread_workqueue("timesync");
-       stop_machine_create();
 }
 
 /*
index 2ac4440e7b087c1a9ac9c0b087d09a12be3f3a2e..8943b8ccee1a2ba3c35ac8eabfc14bd716fb136c 100644 (file)
@@ -80,12 +80,6 @@ static void do_suspend(void)
 
        shutting_down = SHUTDOWN_SUSPEND;
 
-       err = stop_machine_create();
-       if (err) {
-               printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err);
-               goto out;
-       }
-
 #ifdef CONFIG_PREEMPT
        /* If the kernel is preemptible, we need to freeze all the processes
           to prevent them from being in the middle of a pagetable update
@@ -93,7 +87,7 @@ static void do_suspend(void)
        err = freeze_processes();
        if (err) {
                printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
-               goto out_destroy_sm;
+               goto out;
        }
 #endif
 
@@ -136,12 +130,8 @@ out_resume:
 out_thaw:
 #ifdef CONFIG_PREEMPT
        thaw_processes();
-
-out_destroy_sm:
-#endif
-       stop_machine_destroy();
-
 out:
+#endif
        shutting_down = SHUTDOWN_INVALID;
 }
 #endif /* CONFIG_PM_SLEEP */
index efcbd6c379474877b30b69176a7954a2dad14587..0e552e72a4c46685cd14a571aa8b68ce80b208a8 100644 (file)
@@ -67,23 +67,6 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
  */
 int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
 
-/**
- * stop_machine_create: create all stop_machine threads
- *
- * Description: This causes all stop_machine threads to be created before
- * stop_machine actually gets called. This can be used by subsystems that
- * need a non failing stop_machine infrastructure.
- */
-int stop_machine_create(void);
-
-/**
- * stop_machine_destroy: destroy all stop_machine threads
- *
- * Description: This causes all stop_machine threads which were created with
- * stop_machine_create to be destroyed again.
- */
-void stop_machine_destroy(void);
-
 #else
 
 static inline int stop_machine(int (*fn)(void *), void *data,
@@ -96,8 +79,5 @@ static inline int stop_machine(int (*fn)(void *), void *data,
        return ret;
 }
 
-static inline int stop_machine_create(void) { return 0; }
-static inline void stop_machine_destroy(void) { }
-
 #endif /* CONFIG_SMP */
 #endif /* _LINUX_STOP_MACHINE */
index 914aedcde849a2d577dcfae1a670a3e6c83877e0..545777574779da71ef8e3fcc3935e0e41fa266ca 100644 (file)
@@ -266,9 +266,6 @@ int __ref cpu_down(unsigned int cpu)
 {
        int err;
 
-       err = stop_machine_create();
-       if (err)
-               return err;
        cpu_maps_update_begin();
 
        if (cpu_hotplug_disabled) {
@@ -280,7 +277,6 @@ int __ref cpu_down(unsigned int cpu)
 
 out:
        cpu_maps_update_done();
-       stop_machine_destroy();
        return err;
 }
 EXPORT_SYMBOL(cpu_down);
@@ -361,9 +357,6 @@ int disable_nonboot_cpus(void)
 {
        int cpu, first_cpu, error;
 
-       error = stop_machine_create();
-       if (error)
-               return error;
        cpu_maps_update_begin();
        first_cpu = cpumask_first(cpu_online_mask);
        /*
@@ -394,7 +387,6 @@ int disable_nonboot_cpus(void)
                printk(KERN_ERR "Non-boot CPUs are not disabled\n");
        }
        cpu_maps_update_done();
-       stop_machine_destroy();
        return error;
 }
 
index 1016b75b026ab61b7ef0ee233915c3b1adeaf64d..0838246d8c94991ec8e8788b22a86e91e8552349 100644 (file)
@@ -723,16 +723,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
                return -EFAULT;
        name[MODULE_NAME_LEN-1] = '\0';
 
-       /* Create stop_machine threads since free_module relies on
-        * a non-failing stop_machine call. */
-       ret = stop_machine_create();
-       if (ret)
-               return ret;
-
-       if (mutex_lock_interruptible(&module_mutex) != 0) {
-               ret = -EINTR;
-               goto out_stop;
-       }
+       if (mutex_lock_interruptible(&module_mutex) != 0)
+               return -EINTR;
 
        mod = find_module(name);
        if (!mod) {
@@ -792,8 +784,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
 
  out:
        mutex_unlock(&module_mutex);
-out_stop:
-       stop_machine_destroy();
        return ret;
 }
 
index 7e3f9182aef3a3283b9e92c55bf3ecf833d50ae9..884c7a1afeed38f8dfdddc65fe04c0bca551c8bf 100644 (file)
@@ -388,174 +388,92 @@ enum stopmachine_state {
        /* Exit */
        STOPMACHINE_EXIT,
 };
-static enum stopmachine_state state;
 
 struct stop_machine_data {
-       int (*fn)(void *);
-       void *data;
-       int fnret;
+       int                     (*fn)(void *);
+       void                    *data;
+       /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
+       unsigned int            num_threads;
+       const struct cpumask    *active_cpus;
+
+       enum stopmachine_state  state;
+       atomic_t                thread_ack;
 };
 
-/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
-static unsigned int num_threads;
-static atomic_t thread_ack;
-static DEFINE_MUTEX(lock);
-/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */
-static DEFINE_MUTEX(setup_lock);
-/* Users of stop_machine. */
-static int refcount;
-static struct workqueue_struct *stop_machine_wq;
-static struct stop_machine_data active, idle;
-static const struct cpumask *active_cpus;
-static void __percpu *stop_machine_work;
-
-static void set_state(enum stopmachine_state newstate)
+static void set_state(struct stop_machine_data *smdata,
+                     enum stopmachine_state newstate)
 {
        /* Reset ack counter. */
-       atomic_set(&thread_ack, num_threads);
+       atomic_set(&smdata->thread_ack, smdata->num_threads);
        smp_wmb();
-       state = newstate;
+       smdata->state = newstate;
 }
 
 /* Last one to ack a state moves to the next state. */
-static void ack_state(void)
+static void ack_state(struct stop_machine_data *smdata)
 {
-       if (atomic_dec_and_test(&thread_ack))
-               set_state(state + 1);
+       if (atomic_dec_and_test(&smdata->thread_ack))
+               set_state(smdata, smdata->state + 1);
 }
 
-/* This is the actual function which stops the CPU. It runs
- * in the context of a dedicated stopmachine workqueue. */
-static void stop_cpu(struct work_struct *unused)
+/* This is the cpu_stop function which stops the CPU. */
+static int stop_machine_cpu_stop(void *data)
 {
+       struct stop_machine_data *smdata = data;
        enum stopmachine_state curstate = STOPMACHINE_NONE;
-       struct stop_machine_data *smdata = &idle;
-       int cpu = smp_processor_id();
-       int err;
+       int cpu = smp_processor_id(), err = 0;
+       bool is_active;
+
+       if (!smdata->active_cpus)
+               is_active = cpu == cpumask_first(cpu_online_mask);
+       else
+               is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
 
-       if (!active_cpus) {
-               if (cpu == cpumask_first(cpu_online_mask))
-                       smdata = &active;
-       } else {
-               if (cpumask_test_cpu(cpu, active_cpus))
-                       smdata = &active;
-       }
        /* Simple state machine */
        do {
                /* Chill out and ensure we re-read stopmachine_state. */
                cpu_relax();
-               if (state != curstate) {
-                       curstate = state;
+               if (smdata->state != curstate) {
+                       curstate = smdata->state;
                        switch (curstate) {
                        case STOPMACHINE_DISABLE_IRQ:
                                local_irq_disable();
                                hard_irq_disable();
                                break;
                        case STOPMACHINE_RUN:
-                               /* On multiple CPUs only a single error code
-                                * is needed to tell that something failed. */
-                               err = smdata->fn(smdata->data);
-                               if (err)
-                                       smdata->fnret = err;
+                               if (is_active)
+                                       err = smdata->fn(smdata->data);
                                break;
                        default:
                                break;
                        }
-                       ack_state();
+                       ack_state(smdata);
                }
        } while (curstate != STOPMACHINE_EXIT);
 
        local_irq_enable();
+       return err;
 }
 
-/* Callback for CPUs which aren't supposed to do anything. */
-static int chill(void *unused)
-{
-       return 0;
-}
-
-int stop_machine_create(void)
-{
-       mutex_lock(&setup_lock);
-       if (refcount)
-               goto done;
-       stop_machine_wq = create_rt_workqueue("kstop");
-       if (!stop_machine_wq)
-               goto err_out;
-       stop_machine_work = alloc_percpu(struct work_struct);
-       if (!stop_machine_work)
-               goto err_out;
-done:
-       refcount++;
-       mutex_unlock(&setup_lock);
-       return 0;
-
-err_out:
-       if (stop_machine_wq)
-               destroy_workqueue(stop_machine_wq);
-       mutex_unlock(&setup_lock);
-       return -ENOMEM;
-}
-EXPORT_SYMBOL_GPL(stop_machine_create);
-
-void stop_machine_destroy(void)
-{
-       mutex_lock(&setup_lock);
-       refcount--;
-       if (refcount)
-               goto done;
-       destroy_workqueue(stop_machine_wq);
-       free_percpu(stop_machine_work);
-done:
-       mutex_unlock(&setup_lock);
-}
-EXPORT_SYMBOL_GPL(stop_machine_destroy);
-
 int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
-       struct work_struct *sm_work;
-       int i, ret;
-
-       /* Set up initial state. */
-       mutex_lock(&lock);
-       num_threads = num_online_cpus();
-       active_cpus = cpus;
-       active.fn = fn;
-       active.data = data;
-       active.fnret = 0;
-       idle.fn = chill;
-       idle.data = NULL;
-
-       set_state(STOPMACHINE_PREPARE);
-
-       /* Schedule the stop_cpu work on all cpus: hold this CPU so one
-        * doesn't hit this CPU until we're ready. */
-       get_cpu();
-       for_each_online_cpu(i) {
-               sm_work = per_cpu_ptr(stop_machine_work, i);
-               INIT_WORK(sm_work, stop_cpu);
-               queue_work_on(i, stop_machine_wq, sm_work);
-       }
-       /* This will release the thread on our CPU. */
-       put_cpu();
-       flush_workqueue(stop_machine_wq);
-       ret = active.fnret;
-       mutex_unlock(&lock);
-       return ret;
+       struct stop_machine_data smdata = { .fn = fn, .data = data,
+                                           .num_threads = num_online_cpus(),
+                                           .active_cpus = cpus };
+
+       /* Set the initial state and stop all online cpus. */
+       set_state(&smdata, STOPMACHINE_PREPARE);
+       return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
 }
 
 int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
        int ret;
 
-       ret = stop_machine_create();
-       if (ret)
-               return ret;
        /* No CPUs can come up or down during this. */
        get_online_cpus();
        ret = __stop_machine(fn, data, cpus);
        put_online_cpus();
-       stop_machine_destroy();
        return ret;
 }
 EXPORT_SYMBOL_GPL(stop_machine);