static char cpuset_nodelist[CPUSET_NODELIST_LEN];
static DEFINE_SPINLOCK(cpuset_buffer_lock);
+/*
+ * CPU / memory hotplug is handled asynchronously.
+ */
+static void cpuset_hotplug_workfn(struct work_struct *work);
+
+static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
+
/*
* This is ugly, but preserves the userspace API for existing cpuset
* users. If someone tries to mount the "cpuset" filesystem, we
struct cpuset *cs = cgroup_cs(cgrp);
struct cpuset *trialcs;
+ /*
+ * CPU or memory hotunplug may leave @cs w/o any execution
+ * resources, in which case the hotplug code asynchronously updates
+ * configuration and transfers all tasks to the nearest ancestor
+ * which can execute.
+ *
+ * As writes to "cpus" or "mems" may restore @cs's execution
+ * resources, wait for the previously scheduled operations before
+ * proceeding, so that we don't end up keep removing tasks added
+ * after execution capability is restored.
+ */
+ flush_work(&cpuset_hotplug_work);
+
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
}
/**
- * cpuset_handle_hotplug - handle CPU/memory hot[un]plug
+ * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset
*
* This function is called after either CPU or memory configuration has
* changed and updates cpuset accordingly. The top_cpuset is always
* Note that CPU offlining during suspend is ignored. We don't modify
* cpusets across suspend/resume cycles at all.
*/
-static void cpuset_handle_hotplug(void)
+static void cpuset_hotplug_workfn(struct work_struct *work)
{
static cpumask_t new_cpus, tmp_cpus;
static nodemask_t new_mems, tmp_mems;
void cpuset_update_active_cpus(bool cpu_online)
{
- cpuset_handle_hotplug();
+ /*
+ * We're inside cpu hotplug critical region which usually nests
+ * inside cgroup synchronization. Bounce actual hotplug processing
+ * to a work item to avoid reverse locking order.
+ *
+ * We still need to do partition_sched_domains() synchronously;
+ * otherwise, the scheduler will get confused and put tasks to the
+ * dead CPU. Fall back to the default single domain.
+ * cpuset_hotplug_workfn() will rebuild it as necessary.
+ */
+ partition_sched_domains(1, NULL, NULL);
+ schedule_work(&cpuset_hotplug_work);
}
#ifdef CONFIG_MEMORY_HOTPLUG
static int cpuset_track_online_nodes(struct notifier_block *self,
unsigned long action, void *arg)
{
- cpuset_handle_hotplug();
+ schedule_work(&cpuset_hotplug_work);
return NOTIFY_OK;
}
#endif