static int worker_thread(void *__worker);
static void copy_workqueue_attrs(struct workqueue_attrs *to,
const struct workqueue_attrs *from);
+static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>
}
EXPORT_SYMBOL_GPL(execute_in_process_context);
-#ifdef CONFIG_SYSFS
-/*
- * Workqueues with WQ_SYSFS flag set is visible to userland via
- * /sys/bus/workqueue/devices/WQ_NAME. All visible workqueues have the
- * following attributes.
- *
- * per_cpu RO bool : whether the workqueue is per-cpu or unbound
- * max_active RW int : maximum number of in-flight work items
- *
- * Unbound workqueues have the following extra attributes.
+/**
+ * free_workqueue_attrs - free a workqueue_attrs
+ * @attrs: workqueue_attrs to free
*
- * id RO int : the associated pool ID
- * nice RW int : nice value of the workers
- * cpumask RW mask : bitmask of allowed CPUs for the workers
+ * Undo alloc_workqueue_attrs().
*/
-struct wq_device {
- struct workqueue_struct *wq;
- struct device dev;
-};
-
-static struct workqueue_struct *dev_to_wq(struct device *dev)
+void free_workqueue_attrs(struct workqueue_attrs *attrs)
{
- struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
-
- return wq_dev->wq;
+ if (attrs) {
+ free_cpumask_var(attrs->cpumask);
+ kfree(attrs);
+ }
}
-static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
- char *buf)
+/**
+ * alloc_workqueue_attrs - allocate a workqueue_attrs
+ * @gfp_mask: allocation mask to use
+ *
+ * Allocate a new workqueue_attrs, initialize with default settings and
+ * return it.
+ *
+ * Return: The allocated new workqueue_attr on success. %NULL on failure.
+ */
+struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
{
- struct workqueue_struct *wq = dev_to_wq(dev);
+ struct workqueue_attrs *attrs;
- return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
+ attrs = kzalloc(sizeof(*attrs), gfp_mask);
+ if (!attrs)
+ goto fail;
+ if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
+ goto fail;
+
+ cpumask_copy(attrs->cpumask, cpu_possible_mask);
+ return attrs;
+fail:
+ free_workqueue_attrs(attrs);
+ return NULL;
}
-static DEVICE_ATTR_RO(per_cpu);
-static ssize_t max_active_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static void copy_workqueue_attrs(struct workqueue_attrs *to,
+ const struct workqueue_attrs *from)
{
- struct workqueue_struct *wq = dev_to_wq(dev);
-
- return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
+ to->nice = from->nice;
+ cpumask_copy(to->cpumask, from->cpumask);
+ /*
+ * Unlike hash and equality test, this function doesn't ignore
+ * ->no_numa as it is used for both pool and wq attrs. Instead,
+ * get_unbound_pool() explicitly clears ->no_numa after copying.
+ */
+ to->no_numa = from->no_numa;
}
-static ssize_t max_active_store(struct device *dev,
- struct device_attribute *attr, const char *buf,
- size_t count)
+/* hash value of the content of @attr */
+static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
{
- struct workqueue_struct *wq = dev_to_wq(dev);
- int val;
-
- if (sscanf(buf, "%d", &val) != 1 || val <= 0)
- return -EINVAL;
+ u32 hash = 0;
- workqueue_set_max_active(wq, val);
- return count;
+ hash = jhash_1word(attrs->nice, hash);
+ hash = jhash(cpumask_bits(attrs->cpumask),
+ BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
+ return hash;
}
-static DEVICE_ATTR_RW(max_active);
-
-static struct attribute *wq_sysfs_attrs[] = {
- &dev_attr_per_cpu.attr,
- &dev_attr_max_active.attr,
- NULL,
-};
-ATTRIBUTE_GROUPS(wq_sysfs);
-static ssize_t wq_pool_ids_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+/* content equality test */
+static bool wqattrs_equal(const struct workqueue_attrs *a,
+ const struct workqueue_attrs *b)
{
- struct workqueue_struct *wq = dev_to_wq(dev);
- const char *delim = "";
- int node, written = 0;
-
- rcu_read_lock_sched();
- for_each_node(node) {
- written += scnprintf(buf + written, PAGE_SIZE - written,
- "%s%d:%d", delim, node,
- unbound_pwq_by_node(wq, node)->pool->id);
- delim = " ";
- }
- written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
- rcu_read_unlock_sched();
-
- return written;
+ if (a->nice != b->nice)
+ return false;
+ if (!cpumask_equal(a->cpumask, b->cpumask))
+ return false;
+ return true;
}
-static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
- char *buf)
+/**
+ * init_worker_pool - initialize a newly zalloc'd worker_pool
+ * @pool: worker_pool to initialize
+ *
+ * Initiailize a newly zalloc'd @pool. It also allocates @pool->attrs.
+ *
+ * Return: 0 on success, -errno on failure. Even on failure, all fields
+ * inside @pool proper are initialized and put_unbound_pool() can be called
+ * on @pool safely to release it.
+ */
+static int init_worker_pool(struct worker_pool *pool)
{
- struct workqueue_struct *wq = dev_to_wq(dev);
- int written;
+ spin_lock_init(&pool->lock);
+ pool->id = -1;
+ pool->cpu = -1;
+ pool->node = NUMA_NO_NODE;
+ pool->flags |= POOL_DISASSOCIATED;
+ INIT_LIST_HEAD(&pool->worklist);
+ INIT_LIST_HEAD(&pool->idle_list);
+ hash_init(pool->busy_hash);
- mutex_lock(&wq->mutex);
- written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
- mutex_unlock(&wq->mutex);
+ init_timer_deferrable(&pool->idle_timer);
+ pool->idle_timer.function = idle_worker_timeout;
+ pool->idle_timer.data = (unsigned long)pool;
- return written;
-}
+ setup_timer(&pool->mayday_timer, pool_mayday_timeout,
+ (unsigned long)pool);
-/* prepare workqueue_attrs for sysfs store operations */
-static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
-{
- struct workqueue_attrs *attrs;
+ mutex_init(&pool->manager_arb);
+ mutex_init(&pool->attach_mutex);
+ INIT_LIST_HEAD(&pool->workers);
- attrs = alloc_workqueue_attrs(GFP_KERNEL);
- if (!attrs)
- return NULL;
+ ida_init(&pool->worker_ida);
+ INIT_HLIST_NODE(&pool->hash_node);
+ pool->refcnt = 1;
- mutex_lock(&wq->mutex);
- copy_workqueue_attrs(attrs, wq->unbound_attrs);
- mutex_unlock(&wq->mutex);
- return attrs;
+ /* shouldn't fail above this point */
+ pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
+ if (!pool->attrs)
+ return -ENOMEM;
+ return 0;
}
-static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
+static void rcu_free_wq(struct rcu_head *rcu)
{
- struct workqueue_struct *wq = dev_to_wq(dev);
- struct workqueue_attrs *attrs;
- int ret;
-
- attrs = wq_sysfs_prep_attrs(wq);
- if (!attrs)
- return -ENOMEM;
+ struct workqueue_struct *wq =
+ container_of(rcu, struct workqueue_struct, rcu);
- if (sscanf(buf, "%d", &attrs->nice) == 1 &&
- attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
- ret = apply_workqueue_attrs(wq, attrs);
+ if (!(wq->flags & WQ_UNBOUND))
+ free_percpu(wq->cpu_pwqs);
else
- ret = -EINVAL;
+ free_workqueue_attrs(wq->unbound_attrs);
- free_workqueue_attrs(attrs);
- return ret ?: count;
+ kfree(wq->rescuer);
+ kfree(wq);
}
-static ssize_t wq_cpumask_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static void rcu_free_pool(struct rcu_head *rcu)
{
- struct workqueue_struct *wq = dev_to_wq(dev);
- int written;
+ struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
- mutex_lock(&wq->mutex);
- written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
- cpumask_pr_args(wq->unbound_attrs->cpumask));
- mutex_unlock(&wq->mutex);
- return written;
+ ida_destroy(&pool->worker_ida);
+ free_workqueue_attrs(pool->attrs);
+ kfree(pool);
}
-static ssize_t wq_cpumask_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+/**
+ * put_unbound_pool - put a worker_pool
+ * @pool: worker_pool to put
+ *
+ * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
+ * safe manner. get_unbound_pool() calls this function on its failure path
+ * and this function should be able to release pools which went through,
+ * successfully or not, init_worker_pool().
+ *
+ * Should be called with wq_pool_mutex held.
+ */
+static void put_unbound_pool(struct worker_pool *pool)
{
- struct workqueue_struct *wq = dev_to_wq(dev);
- struct workqueue_attrs *attrs;
- int ret;
+ DECLARE_COMPLETION_ONSTACK(detach_completion);
+ struct worker *worker;
- attrs = wq_sysfs_prep_attrs(wq);
- if (!attrs)
- return -ENOMEM;
+ lockdep_assert_held(&wq_pool_mutex);
- ret = cpumask_parse(buf, attrs->cpumask);
- if (!ret)
- ret = apply_workqueue_attrs(wq, attrs);
+ if (--pool->refcnt)
+ return;
- free_workqueue_attrs(attrs);
- return ret ?: count;
-}
+ /* sanity checks */
+ if (WARN_ON(!(pool->cpu < 0)) ||
+ WARN_ON(!list_empty(&pool->worklist)))
+ return;
-static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct workqueue_struct *wq = dev_to_wq(dev);
- int written;
-
- mutex_lock(&wq->mutex);
- written = scnprintf(buf, PAGE_SIZE, "%d\n",
- !wq->unbound_attrs->no_numa);
- mutex_unlock(&wq->mutex);
-
- return written;
-}
-
-static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct workqueue_struct *wq = dev_to_wq(dev);
- struct workqueue_attrs *attrs;
- int v, ret;
-
- attrs = wq_sysfs_prep_attrs(wq);
- if (!attrs)
- return -ENOMEM;
+ /* release id and unhash */
+ if (pool->id >= 0)
+ idr_remove(&worker_pool_idr, pool->id);
+ hash_del(&pool->hash_node);
- ret = -EINVAL;
- if (sscanf(buf, "%d", &v) == 1) {
- attrs->no_numa = !v;
- ret = apply_workqueue_attrs(wq, attrs);
- }
+ /*
+ * Become the manager and destroy all workers. Grabbing
+ * manager_arb prevents @pool's workers from blocking on
+ * attach_mutex.
+ */
+ mutex_lock(&pool->manager_arb);
- free_workqueue_attrs(attrs);
- return ret ?: count;
-}
+ spin_lock_irq(&pool->lock);
+ while ((worker = first_idle_worker(pool)))
+ destroy_worker(worker);
+ WARN_ON(pool->nr_workers || pool->nr_idle);
+ spin_unlock_irq(&pool->lock);
-static struct device_attribute wq_sysfs_unbound_attrs[] = {
- __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
- __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
- __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
- __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
- __ATTR_NULL,
-};
+ mutex_lock(&pool->attach_mutex);
+ if (!list_empty(&pool->workers))
+ pool->detach_completion = &detach_completion;
+ mutex_unlock(&pool->attach_mutex);
-static struct bus_type wq_subsys = {
- .name = "workqueue",
- .dev_groups = wq_sysfs_groups,
-};
+ if (pool->detach_completion)
+ wait_for_completion(pool->detach_completion);
-static int __init wq_sysfs_init(void)
-{
- return subsys_virtual_register(&wq_subsys, NULL);
-}
-core_initcall(wq_sysfs_init);
+ mutex_unlock(&pool->manager_arb);
-static void wq_device_release(struct device *dev)
-{
- struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
+ /* shut down the timers */
+ del_timer_sync(&pool->idle_timer);
+ del_timer_sync(&pool->mayday_timer);
- kfree(wq_dev);
+ /* sched-RCU protected to allow dereferences from get_work_pool() */
+ call_rcu_sched(&pool->rcu, rcu_free_pool);
}
/**
- * workqueue_sysfs_register - make a workqueue visible in sysfs
- * @wq: the workqueue to register
+ * get_unbound_pool - get a worker_pool with the specified attributes
+ * @attrs: the attributes of the worker_pool to get
*
- * Expose @wq in sysfs under /sys/bus/workqueue/devices.
- * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
- * which is the preferred method.
+ * Obtain a worker_pool which has the same attributes as @attrs, bump the
+ * reference count and return it. If there already is a matching
+ * worker_pool, it will be used; otherwise, this function attempts to
+ * create a new one.
*
- * Workqueue user should use this function directly iff it wants to apply
- * workqueue_attrs before making the workqueue visible in sysfs; otherwise,
- * apply_workqueue_attrs() may race against userland updating the
- * attributes.
+ * Should be called with wq_pool_mutex held.
*
- * Return: 0 on success, -errno on failure.
+ * Return: On success, a worker_pool with the same attributes as @attrs.
+ * On failure, %NULL.
*/
-int workqueue_sysfs_register(struct workqueue_struct *wq)
+static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
{
- struct wq_device *wq_dev;
- int ret;
+ u32 hash = wqattrs_hash(attrs);
+ struct worker_pool *pool;
+ int node;
- /*
- * Adjusting max_active or creating new pwqs by applyting
- * attributes breaks ordering guarantee. Disallow exposing ordered
- * workqueues.
- */
- if (WARN_ON(wq->flags & __WQ_ORDERED))
- return -EINVAL;
+ lockdep_assert_held(&wq_pool_mutex);
- wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
- if (!wq_dev)
- return -ENOMEM;
+ /* do we already have a matching pool? */
+ hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
+ if (wqattrs_equal(pool->attrs, attrs)) {
+ pool->refcnt++;
+ return pool;
+ }
+ }
- wq_dev->wq = wq;
- wq_dev->dev.bus = &wq_subsys;
- wq_dev->dev.init_name = wq->name;
- wq_dev->dev.release = wq_device_release;
+ /* nope, create a new one */
+ pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool || init_worker_pool(pool) < 0)
+ goto fail;
+
+ lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */
+ copy_workqueue_attrs(pool->attrs, attrs);
/*
- * unbound_attrs are created separately. Suppress uevent until
- * everything is ready.
+ * no_numa isn't a worker_pool attribute, always clear it. See
+ * 'struct workqueue_attrs' comments for detail.
*/
- dev_set_uevent_suppress(&wq_dev->dev, true);
-
- ret = device_register(&wq_dev->dev);
- if (ret) {
- kfree(wq_dev);
- wq->wq_dev = NULL;
- return ret;
- }
-
- if (wq->flags & WQ_UNBOUND) {
- struct device_attribute *attr;
+ pool->attrs->no_numa = false;
- for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
- ret = device_create_file(&wq_dev->dev, attr);
- if (ret) {
- device_unregister(&wq_dev->dev);
- wq->wq_dev = NULL;
- return ret;
+ /* if cpumask is contained inside a NUMA node, we belong to that node */
+ if (wq_numa_enabled) {
+ for_each_node(node) {
+ if (cpumask_subset(pool->attrs->cpumask,
+ wq_numa_possible_cpumask[node])) {
+ pool->node = node;
+ break;
}
}
}
- dev_set_uevent_suppress(&wq_dev->dev, false);
- kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
- return 0;
-}
+ if (worker_pool_assign_id(pool) < 0)
+ goto fail;
-/**
- * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
- * @wq: the workqueue to unregister
- *
- * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
- */
-static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
-{
- struct wq_device *wq_dev = wq->wq_dev;
+ /* create and start the initial worker */
+ if (!create_worker(pool))
+ goto fail;
- if (!wq->wq_dev)
- return;
+ /* install */
+ hash_add(unbound_pool_hash, &pool->hash_node, hash);
- wq->wq_dev = NULL;
- device_unregister(&wq_dev->dev);
+ return pool;
+fail:
+ if (pool)
+ put_unbound_pool(pool);
+ return NULL;
}
-#else /* CONFIG_SYSFS */
-static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
-#endif /* CONFIG_SYSFS */
-/**
- * free_workqueue_attrs - free a workqueue_attrs
- * @attrs: workqueue_attrs to free
- *
- * Undo alloc_workqueue_attrs().
- */
-void free_workqueue_attrs(struct workqueue_attrs *attrs)
+static void rcu_free_pwq(struct rcu_head *rcu)
{
- if (attrs) {
- free_cpumask_var(attrs->cpumask);
- kfree(attrs);
- }
+ kmem_cache_free(pwq_cache,
+ container_of(rcu, struct pool_workqueue, rcu));
}
-/**
- * alloc_workqueue_attrs - allocate a workqueue_attrs
- * @gfp_mask: allocation mask to use
- *
- * Allocate a new workqueue_attrs, initialize with default settings and
- * return it.
- *
- * Return: The allocated new workqueue_attr on success. %NULL on failure.
+/*
+ * Scheduled on system_wq by put_pwq() when an unbound pwq hits zero refcnt
+ * and needs to be destroyed.
*/
-struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
+static void pwq_unbound_release_workfn(struct work_struct *work)
{
- struct workqueue_attrs *attrs;
+ struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
+ unbound_release_work);
+ struct workqueue_struct *wq = pwq->wq;
+ struct worker_pool *pool = pwq->pool;
+ bool is_last;
- attrs = kzalloc(sizeof(*attrs), gfp_mask);
- if (!attrs)
- goto fail;
- if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
- goto fail;
+ if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
+ return;
- cpumask_copy(attrs->cpumask, cpu_possible_mask);
- return attrs;
-fail:
- free_workqueue_attrs(attrs);
- return NULL;
-}
+ mutex_lock(&wq->mutex);
+ list_del_rcu(&pwq->pwqs_node);
+ is_last = list_empty(&wq->pwqs);
+ mutex_unlock(&wq->mutex);
+
+ mutex_lock(&wq_pool_mutex);
+ put_unbound_pool(pool);
+ mutex_unlock(&wq_pool_mutex);
+
+ call_rcu_sched(&pwq->rcu, rcu_free_pwq);
-static void copy_workqueue_attrs(struct workqueue_attrs *to,
- const struct workqueue_attrs *from)
-{
- to->nice = from->nice;
- cpumask_copy(to->cpumask, from->cpumask);
/*
- * Unlike hash and equality test, this function doesn't ignore
- * ->no_numa as it is used for both pool and wq attrs. Instead,
- * get_unbound_pool() explicitly clears ->no_numa after copying.
+ * If we're the last pwq going away, @wq is already dead and no one
+ * is gonna access it anymore. Schedule RCU free.
*/
- to->no_numa = from->no_numa;
-}
-
-/* hash value of the content of @attr */
-static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
-{
- u32 hash = 0;
-
- hash = jhash_1word(attrs->nice, hash);
- hash = jhash(cpumask_bits(attrs->cpumask),
- BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
- return hash;
-}
-
-/* content equality test */
-static bool wqattrs_equal(const struct workqueue_attrs *a,
- const struct workqueue_attrs *b)
-{
- if (a->nice != b->nice)
- return false;
- if (!cpumask_equal(a->cpumask, b->cpumask))
- return false;
- return true;
+ if (is_last)
+ call_rcu_sched(&wq->rcu, rcu_free_wq);
}
/**
- * init_worker_pool - initialize a newly zalloc'd worker_pool
- * @pool: worker_pool to initialize
- *
- * Initiailize a newly zalloc'd @pool. It also allocates @pool->attrs.
+ * pwq_adjust_max_active - update a pwq's max_active to the current setting
+ * @pwq: target pool_workqueue
*
- * Return: 0 on success, -errno on failure. Even on failure, all fields
- * inside @pool proper are initialized and put_unbound_pool() can be called
- * on @pool safely to release it.
+ * If @pwq isn't freezing, set @pwq->max_active to the associated
+ * workqueue's saved_max_active and activate delayed work items
+ * accordingly. If @pwq is freezing, clear @pwq->max_active to zero.
*/
-static int init_worker_pool(struct worker_pool *pool)
+static void pwq_adjust_max_active(struct pool_workqueue *pwq)
{
- spin_lock_init(&pool->lock);
- pool->id = -1;
- pool->cpu = -1;
- pool->node = NUMA_NO_NODE;
- pool->flags |= POOL_DISASSOCIATED;
- INIT_LIST_HEAD(&pool->worklist);
- INIT_LIST_HEAD(&pool->idle_list);
- hash_init(pool->busy_hash);
-
- init_timer_deferrable(&pool->idle_timer);
- pool->idle_timer.function = idle_worker_timeout;
- pool->idle_timer.data = (unsigned long)pool;
+ struct workqueue_struct *wq = pwq->wq;
+ bool freezable = wq->flags & WQ_FREEZABLE;
- setup_timer(&pool->mayday_timer, pool_mayday_timeout,
- (unsigned long)pool);
+ /* for @wq->saved_max_active */
+ lockdep_assert_held(&wq->mutex);
- mutex_init(&pool->manager_arb);
- mutex_init(&pool->attach_mutex);
- INIT_LIST_HEAD(&pool->workers);
+ /* fast exit for non-freezable wqs */
+ if (!freezable && pwq->max_active == wq->saved_max_active)
+ return;
- ida_init(&pool->worker_ida);
- INIT_HLIST_NODE(&pool->hash_node);
- pool->refcnt = 1;
+ spin_lock_irq(&pwq->pool->lock);
- /* shouldn't fail above this point */
- pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
- if (!pool->attrs)
- return -ENOMEM;
- return 0;
-}
+ /*
+ * During [un]freezing, the caller is responsible for ensuring that
+ * this function is called at least once after @workqueue_freezing
+ * is updated and visible.
+ */
+ if (!freezable || !workqueue_freezing) {
+ pwq->max_active = wq->saved_max_active;
-static void rcu_free_wq(struct rcu_head *rcu)
-{
- struct workqueue_struct *wq =
- container_of(rcu, struct workqueue_struct, rcu);
+ while (!list_empty(&pwq->delayed_works) &&
+ pwq->nr_active < pwq->max_active)
+ pwq_activate_first_delayed(pwq);
- if (!(wq->flags & WQ_UNBOUND))
- free_percpu(wq->cpu_pwqs);
- else
- free_workqueue_attrs(wq->unbound_attrs);
+ /*
+ * Need to kick a worker after thawed or an unbound wq's
+ * max_active is bumped. It's a slow path. Do it always.
+ */
+ wake_up_worker(pwq->pool);
+ } else {
+ pwq->max_active = 0;
+ }
- kfree(wq->rescuer);
- kfree(wq);
+ spin_unlock_irq(&pwq->pool->lock);
}
-static void rcu_free_pool(struct rcu_head *rcu)
+/* initialize newly alloced @pwq which is associated with @wq and @pool */
+static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
+ struct worker_pool *pool)
{
- struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
+ BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
- ida_destroy(&pool->worker_ida);
- free_workqueue_attrs(pool->attrs);
- kfree(pool);
+ memset(pwq, 0, sizeof(*pwq));
+
+ pwq->pool = pool;
+ pwq->wq = wq;
+ pwq->flush_color = -1;
+ pwq->refcnt = 1;
+ INIT_LIST_HEAD(&pwq->delayed_works);
+ INIT_LIST_HEAD(&pwq->pwqs_node);
+ INIT_LIST_HEAD(&pwq->mayday_node);
+ INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
}
-/**
- * put_unbound_pool - put a worker_pool
- * @pool: worker_pool to put
- *
- * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU
- * safe manner. get_unbound_pool() calls this function on its failure path
- * and this function should be able to release pools which went through,
- * successfully or not, init_worker_pool().
- *
- * Should be called with wq_pool_mutex held.
- */
-static void put_unbound_pool(struct worker_pool *pool)
+/* sync @pwq with the current state of its associated wq and link it */
+static void link_pwq(struct pool_workqueue *pwq)
{
- DECLARE_COMPLETION_ONSTACK(detach_completion);
- struct worker *worker;
+ struct workqueue_struct *wq = pwq->wq;
- lockdep_assert_held(&wq_pool_mutex);
+ lockdep_assert_held(&wq->mutex);
- if (--pool->refcnt)
+ /* may be called multiple times, ignore if already linked */
+ if (!list_empty(&pwq->pwqs_node))
return;
- /* sanity checks */
- if (WARN_ON(!(pool->cpu < 0)) ||
- WARN_ON(!list_empty(&pool->worklist)))
- return;
+ /* set the matching work_color */
+ pwq->work_color = wq->work_color;
- /* release id and unhash */
- if (pool->id >= 0)
- idr_remove(&worker_pool_idr, pool->id);
- hash_del(&pool->hash_node);
+ /* sync max_active to the current setting */
+ pwq_adjust_max_active(pwq);
- /*
- * Become the manager and destroy all workers. Grabbing
- * manager_arb prevents @pool's workers from blocking on
- * attach_mutex.
- */
- mutex_lock(&pool->manager_arb);
+ /* link in @pwq */
+ list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
+}
- spin_lock_irq(&pool->lock);
- while ((worker = first_idle_worker(pool)))
- destroy_worker(worker);
- WARN_ON(pool->nr_workers || pool->nr_idle);
- spin_unlock_irq(&pool->lock);
+/* obtain a pool matching @attr and create a pwq associating the pool and @wq */
+static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
+ const struct workqueue_attrs *attrs)
+{
+ struct worker_pool *pool;
+ struct pool_workqueue *pwq;
- mutex_lock(&pool->attach_mutex);
- if (!list_empty(&pool->workers))
- pool->detach_completion = &detach_completion;
- mutex_unlock(&pool->attach_mutex);
+ lockdep_assert_held(&wq_pool_mutex);
- if (pool->detach_completion)
- wait_for_completion(pool->detach_completion);
+ pool = get_unbound_pool(attrs);
+ if (!pool)
+ return NULL;
- mutex_unlock(&pool->manager_arb);
+ pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
+ if (!pwq) {
+ put_unbound_pool(pool);
+ return NULL;
+ }
- /* shut down the timers */
- del_timer_sync(&pool->idle_timer);
- del_timer_sync(&pool->mayday_timer);
+ init_pwq(pwq, wq, pool);
+ return pwq;
+}
- /* sched-RCU protected to allow dereferences from get_work_pool() */
- call_rcu_sched(&pool->rcu, rcu_free_pool);
+/* undo alloc_unbound_pwq(), used only in the error path */
+static void free_unbound_pwq(struct pool_workqueue *pwq)
+{
+ lockdep_assert_held(&wq_pool_mutex);
+
+ if (pwq) {
+ put_unbound_pool(pwq->pool);
+ kmem_cache_free(pwq_cache, pwq);
+ }
}
/**
- * get_unbound_pool - get a worker_pool with the specified attributes
- * @attrs: the attributes of the worker_pool to get
+ * wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node
+ * @attrs: the wq_attrs of interest
+ * @node: the target NUMA node
+ * @cpu_going_down: if >= 0, the CPU to consider as offline
+ * @cpumask: outarg, the resulting cpumask
*
- * Obtain a worker_pool which has the same attributes as @attrs, bump the
- * reference count and return it. If there already is a matching
- * worker_pool, it will be used; otherwise, this function attempts to
- * create a new one.
+ * Calculate the cpumask a workqueue with @attrs should use on @node. If
+ * @cpu_going_down is >= 0, that cpu is considered offline during
+ * calculation. The result is stored in @cpumask.
*
- * Should be called with wq_pool_mutex held.
+ * If NUMA affinity is not enabled, @attrs->cpumask is always used. If
+ * enabled and @node has online CPUs requested by @attrs, the returned
+ * cpumask is the intersection of the possible CPUs of @node and
+ * @attrs->cpumask.
*
- * Return: On success, a worker_pool with the same attributes as @attrs.
- * On failure, %NULL.
+ * The caller is responsible for ensuring that the cpumask of @node stays
+ * stable.
+ *
+ * Return: %true if the resulting @cpumask is different from @attrs->cpumask,
+ * %false if equal.
*/
-static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
+static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
+ int cpu_going_down, cpumask_t *cpumask)
{
- u32 hash = wqattrs_hash(attrs);
- struct worker_pool *pool;
- int node;
+ if (!wq_numa_enabled || attrs->no_numa)
+ goto use_dfl;
- lockdep_assert_held(&wq_pool_mutex);
+ /* does @node have any online CPUs @attrs wants? */
+ cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
+ if (cpu_going_down >= 0)
+ cpumask_clear_cpu(cpu_going_down, cpumask);
- /* do we already have a matching pool? */
- hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
- if (wqattrs_equal(pool->attrs, attrs)) {
- pool->refcnt++;
- return pool;
- }
- }
-
- /* nope, create a new one */
- pool = kzalloc(sizeof(*pool), GFP_KERNEL);
- if (!pool || init_worker_pool(pool) < 0)
- goto fail;
-
- lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */
- copy_workqueue_attrs(pool->attrs, attrs);
-
- /*
- * no_numa isn't a worker_pool attribute, always clear it. See
- * 'struct workqueue_attrs' comments for detail.
- */
- pool->attrs->no_numa = false;
-
- /* if cpumask is contained inside a NUMA node, we belong to that node */
- if (wq_numa_enabled) {
- for_each_node(node) {
- if (cpumask_subset(pool->attrs->cpumask,
- wq_numa_possible_cpumask[node])) {
- pool->node = node;
- break;
- }
- }
- }
-
- if (worker_pool_assign_id(pool) < 0)
- goto fail;
-
- /* create and start the initial worker */
- if (!create_worker(pool))
- goto fail;
-
- /* install */
- hash_add(unbound_pool_hash, &pool->hash_node, hash);
-
- return pool;
-fail:
- if (pool)
- put_unbound_pool(pool);
- return NULL;
-}
-
-static void rcu_free_pwq(struct rcu_head *rcu)
-{
- kmem_cache_free(pwq_cache,
- container_of(rcu, struct pool_workqueue, rcu));
-}
-
-/*
- * Scheduled on system_wq by put_pwq() when an unbound pwq hits zero refcnt
- * and needs to be destroyed.
- */
-static void pwq_unbound_release_workfn(struct work_struct *work)
-{
- struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
- unbound_release_work);
- struct workqueue_struct *wq = pwq->wq;
- struct worker_pool *pool = pwq->pool;
- bool is_last;
-
- if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
- return;
-
- mutex_lock(&wq->mutex);
- list_del_rcu(&pwq->pwqs_node);
- is_last = list_empty(&wq->pwqs);
- mutex_unlock(&wq->mutex);
-
- mutex_lock(&wq_pool_mutex);
- put_unbound_pool(pool);
- mutex_unlock(&wq_pool_mutex);
-
- call_rcu_sched(&pwq->rcu, rcu_free_pwq);
-
- /*
- * If we're the last pwq going away, @wq is already dead and no one
- * is gonna access it anymore. Schedule RCU free.
- */
- if (is_last)
- call_rcu_sched(&wq->rcu, rcu_free_wq);
-}
-
-/**
- * pwq_adjust_max_active - update a pwq's max_active to the current setting
- * @pwq: target pool_workqueue
- *
- * If @pwq isn't freezing, set @pwq->max_active to the associated
- * workqueue's saved_max_active and activate delayed work items
- * accordingly. If @pwq is freezing, clear @pwq->max_active to zero.
- */
-static void pwq_adjust_max_active(struct pool_workqueue *pwq)
-{
- struct workqueue_struct *wq = pwq->wq;
- bool freezable = wq->flags & WQ_FREEZABLE;
-
- /* for @wq->saved_max_active */
- lockdep_assert_held(&wq->mutex);
-
- /* fast exit for non-freezable wqs */
- if (!freezable && pwq->max_active == wq->saved_max_active)
- return;
-
- spin_lock_irq(&pwq->pool->lock);
-
- /*
- * During [un]freezing, the caller is responsible for ensuring that
- * this function is called at least once after @workqueue_freezing
- * is updated and visible.
- */
- if (!freezable || !workqueue_freezing) {
- pwq->max_active = wq->saved_max_active;
-
- while (!list_empty(&pwq->delayed_works) &&
- pwq->nr_active < pwq->max_active)
- pwq_activate_first_delayed(pwq);
-
- /*
- * Need to kick a worker after thawed or an unbound wq's
- * max_active is bumped. It's a slow path. Do it always.
- */
- wake_up_worker(pwq->pool);
- } else {
- pwq->max_active = 0;
- }
-
- spin_unlock_irq(&pwq->pool->lock);
-}
-
-/* initialize newly alloced @pwq which is associated with @wq and @pool */
-static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
- struct worker_pool *pool)
-{
- BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
-
- memset(pwq, 0, sizeof(*pwq));
-
- pwq->pool = pool;
- pwq->wq = wq;
- pwq->flush_color = -1;
- pwq->refcnt = 1;
- INIT_LIST_HEAD(&pwq->delayed_works);
- INIT_LIST_HEAD(&pwq->pwqs_node);
- INIT_LIST_HEAD(&pwq->mayday_node);
- INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
-}
-
-/* sync @pwq with the current state of its associated wq and link it */
-static void link_pwq(struct pool_workqueue *pwq)
-{
- struct workqueue_struct *wq = pwq->wq;
-
- lockdep_assert_held(&wq->mutex);
-
- /* may be called multiple times, ignore if already linked */
- if (!list_empty(&pwq->pwqs_node))
- return;
-
- /* set the matching work_color */
- pwq->work_color = wq->work_color;
-
- /* sync max_active to the current setting */
- pwq_adjust_max_active(pwq);
-
- /* link in @pwq */
- list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
-}
-
-/* obtain a pool matching @attr and create a pwq associating the pool and @wq */
-static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
- const struct workqueue_attrs *attrs)
-{
- struct worker_pool *pool;
- struct pool_workqueue *pwq;
-
- lockdep_assert_held(&wq_pool_mutex);
-
- pool = get_unbound_pool(attrs);
- if (!pool)
- return NULL;
-
- pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
- if (!pwq) {
- put_unbound_pool(pool);
- return NULL;
- }
-
- init_pwq(pwq, wq, pool);
- return pwq;
-}
-
-/* undo alloc_unbound_pwq(), used only in the error path */
-static void free_unbound_pwq(struct pool_workqueue *pwq)
-{
- lockdep_assert_held(&wq_pool_mutex);
-
- if (pwq) {
- put_unbound_pool(pwq->pool);
- kmem_cache_free(pwq_cache, pwq);
- }
-}
-
-/**
- * wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node
- * @attrs: the wq_attrs of interest
- * @node: the target NUMA node
- * @cpu_going_down: if >= 0, the CPU to consider as offline
- * @cpumask: outarg, the resulting cpumask
- *
- * Calculate the cpumask a workqueue with @attrs should use on @node. If
- * @cpu_going_down is >= 0, that cpu is considered offline during
- * calculation. The result is stored in @cpumask.
- *
- * If NUMA affinity is not enabled, @attrs->cpumask is always used. If
- * enabled and @node has online CPUs requested by @attrs, the returned
- * cpumask is the intersection of the possible CPUs of @node and
- * @attrs->cpumask.
- *
- * The caller is responsible for ensuring that the cpumask of @node stays
- * stable.
- *
- * Return: %true if the resulting @cpumask is different from @attrs->cpumask,
- * %false if equal.
- */
-static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
- int cpu_going_down, cpumask_t *cpumask)
-{
- if (!wq_numa_enabled || attrs->no_numa)
- goto use_dfl;
-
- /* does @node have any online CPUs @attrs wants? */
- cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
- if (cpu_going_down >= 0)
- cpumask_clear_cpu(cpu_going_down, cpumask);
-
- if (cpumask_empty(cpumask))
- goto use_dfl;
+ if (cpumask_empty(cpumask))
+ goto use_dfl;
/* yeap, return possible CPUs in @node that @attrs wants */
cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
else if (pool->cpu < 0)
restore_unbound_workers_cpumask(pool, cpu);
- mutex_unlock(&pool->attach_mutex);
- }
+ mutex_unlock(&pool->attach_mutex);
+ }
+
+ /* update NUMA affinity of unbound workqueues */
+ list_for_each_entry(wq, &workqueues, list)
+ wq_update_unbound_numa(wq, cpu, true);
+
+ mutex_unlock(&wq_pool_mutex);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+/*
+ * Workqueues should be brought down after normal priority CPU notifiers.
+ * This will be registered as low priority CPU notifier.
+ */
+static int workqueue_cpu_down_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
+{
+ int cpu = (unsigned long)hcpu;
+ struct work_struct unbind_work;
+ struct workqueue_struct *wq;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DOWN_PREPARE:
+ /* unbinding per-cpu workers should happen on the local CPU */
+ INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
+ queue_work_on(cpu, system_highpri_wq, &unbind_work);
+
+ /* update NUMA affinity of unbound workqueues */
+ mutex_lock(&wq_pool_mutex);
+ list_for_each_entry(wq, &workqueues, list)
+ wq_update_unbound_numa(wq, cpu, false);
+ mutex_unlock(&wq_pool_mutex);
+
+ /* wait for per-cpu unbinding to finish */
+ flush_work(&unbind_work);
+ destroy_work_on_stack(&unbind_work);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+#ifdef CONFIG_SMP
+
+struct work_for_cpu {
+ struct work_struct work;
+ long (*fn)(void *);
+ void *arg;
+ long ret;
+};
+
+static void work_for_cpu_fn(struct work_struct *work)
+{
+ struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
+
+ wfc->ret = wfc->fn(wfc->arg);
+}
+
+/**
+ * work_on_cpu - run a function in user context on a particular cpu
+ * @cpu: the cpu to run on
+ * @fn: the function to run
+ * @arg: the function arg
+ *
+ * It is up to the caller to ensure that the cpu doesn't go offline.
+ * The caller must not hold any locks which would prevent @fn from completing.
+ *
+ * Return: The value @fn returns.
+ */
+long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
+{
+ struct work_for_cpu wfc = { .fn = fn, .arg = arg };
+
+ INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
+ schedule_work_on(cpu, &wfc.work);
+ flush_work(&wfc.work);
+ destroy_work_on_stack(&wfc.work);
+ return wfc.ret;
+}
+EXPORT_SYMBOL_GPL(work_on_cpu);
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_FREEZER
+
+/**
+ * freeze_workqueues_begin - begin freezing workqueues
+ *
+ * Start freezing workqueues. After this function returns, all freezable
+ * workqueues will queue new works to their delayed_works list instead of
+ * pool->worklist.
+ *
+ * CONTEXT:
+ * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
+ */
+void freeze_workqueues_begin(void)
+{
+ struct workqueue_struct *wq;
+ struct pool_workqueue *pwq;
+
+ mutex_lock(&wq_pool_mutex);
+
+ WARN_ON_ONCE(workqueue_freezing);
+ workqueue_freezing = true;
+
+ list_for_each_entry(wq, &workqueues, list) {
+ mutex_lock(&wq->mutex);
+ for_each_pwq(pwq, wq)
+ pwq_adjust_max_active(pwq);
+ mutex_unlock(&wq->mutex);
+ }
+
+ mutex_unlock(&wq_pool_mutex);
+}
+
+/**
+ * freeze_workqueues_busy - are freezable workqueues still busy?
+ *
+ * Check whether freezing is complete. This function must be called
+ * between freeze_workqueues_begin() and thaw_workqueues().
+ *
+ * CONTEXT:
+ * Grabs and releases wq_pool_mutex.
+ *
+ * Return:
+ * %true if some freezable workqueues are still busy. %false if freezing
+ * is complete.
+ */
+bool freeze_workqueues_busy(void)
+{
+ bool busy = false;
+ struct workqueue_struct *wq;
+ struct pool_workqueue *pwq;
+
+ mutex_lock(&wq_pool_mutex);
+
+ WARN_ON_ONCE(!workqueue_freezing);
+
+ list_for_each_entry(wq, &workqueues, list) {
+ if (!(wq->flags & WQ_FREEZABLE))
+ continue;
+ /*
+ * nr_active is monotonically decreasing. It's safe
+ * to peek without lock.
+ */
+ rcu_read_lock_sched();
+ for_each_pwq(pwq, wq) {
+ WARN_ON_ONCE(pwq->nr_active < 0);
+ if (pwq->nr_active) {
+ busy = true;
+ rcu_read_unlock_sched();
+ goto out_unlock;
+ }
+ }
+ rcu_read_unlock_sched();
+ }
+out_unlock:
+ mutex_unlock(&wq_pool_mutex);
+ return busy;
+}
+
+/**
+ * thaw_workqueues - thaw workqueues
+ *
+ * Thaw workqueues. Normal queueing is restored and all collected
+ * frozen works are transferred to their respective pool worklists.
+ *
+ * CONTEXT:
+ * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
+ */
+void thaw_workqueues(void)
+{
+ struct workqueue_struct *wq;
+ struct pool_workqueue *pwq;
+
+ mutex_lock(&wq_pool_mutex);
+
+ if (!workqueue_freezing)
+ goto out_unlock;
+
+ workqueue_freezing = false;
+
+ /* restore max_active and repopulate worklist */
+ list_for_each_entry(wq, &workqueues, list) {
+ mutex_lock(&wq->mutex);
+ for_each_pwq(pwq, wq)
+ pwq_adjust_max_active(pwq);
+ mutex_unlock(&wq->mutex);
+ }
+
+out_unlock:
+ mutex_unlock(&wq_pool_mutex);
+}
+#endif /* CONFIG_FREEZER */
+
+#ifdef CONFIG_SYSFS
+/*
+ * Workqueues with WQ_SYSFS flag set is visible to userland via
+ * /sys/bus/workqueue/devices/WQ_NAME. All visible workqueues have the
+ * following attributes.
+ *
+ * per_cpu RO bool : whether the workqueue is per-cpu or unbound
+ * max_active RW int : maximum number of in-flight work items
+ *
+ * Unbound workqueues have the following extra attributes.
+ *
+ * id RO int : the associated pool ID
+ * nice RW int : nice value of the workers
+ * cpumask RW mask : bitmask of allowed CPUs for the workers
+ */
+struct wq_device {
+ struct workqueue_struct *wq;
+ struct device dev;
+};
+
+static struct workqueue_struct *dev_to_wq(struct device *dev)
+{
+ struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
+
+ return wq_dev->wq;
+}
+
+static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct workqueue_struct *wq = dev_to_wq(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
+}
+static DEVICE_ATTR_RO(per_cpu);
+
+static ssize_t max_active_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct workqueue_struct *wq = dev_to_wq(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
+}
+
+static ssize_t max_active_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct workqueue_struct *wq = dev_to_wq(dev);
+ int val;
+
+ if (sscanf(buf, "%d", &val) != 1 || val <= 0)
+ return -EINVAL;
+
+ workqueue_set_max_active(wq, val);
+ return count;
+}
+static DEVICE_ATTR_RW(max_active);
+
+static struct attribute *wq_sysfs_attrs[] = {
+ &dev_attr_per_cpu.attr,
+ &dev_attr_max_active.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(wq_sysfs);
+
+static ssize_t wq_pool_ids_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct workqueue_struct *wq = dev_to_wq(dev);
+ const char *delim = "";
+ int node, written = 0;
+
+ rcu_read_lock_sched();
+ for_each_node(node) {
+ written += scnprintf(buf + written, PAGE_SIZE - written,
+ "%s%d:%d", delim, node,
+ unbound_pwq_by_node(wq, node)->pool->id);
+ delim = " ";
+ }
+ written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
+ rcu_read_unlock_sched();
+
+ return written;
+}
+
+static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct workqueue_struct *wq = dev_to_wq(dev);
+ int written;
+
+ mutex_lock(&wq->mutex);
+ written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
+ mutex_unlock(&wq->mutex);
+
+ return written;
+}
+
+/* prepare workqueue_attrs for sysfs store operations */
+static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
+{
+ struct workqueue_attrs *attrs;
+
+ attrs = alloc_workqueue_attrs(GFP_KERNEL);
+ if (!attrs)
+ return NULL;
+
+ mutex_lock(&wq->mutex);
+ copy_workqueue_attrs(attrs, wq->unbound_attrs);
+ mutex_unlock(&wq->mutex);
+ return attrs;
+}
+
+static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct workqueue_struct *wq = dev_to_wq(dev);
+ struct workqueue_attrs *attrs;
+ int ret;
+
+ attrs = wq_sysfs_prep_attrs(wq);
+ if (!attrs)
+ return -ENOMEM;
+
+ if (sscanf(buf, "%d", &attrs->nice) == 1 &&
+ attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
+ ret = apply_workqueue_attrs(wq, attrs);
+ else
+ ret = -EINVAL;
+
+ free_workqueue_attrs(attrs);
+ return ret ?: count;
+}
+
+static ssize_t wq_cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct workqueue_struct *wq = dev_to_wq(dev);
+ int written;
+
+ mutex_lock(&wq->mutex);
+ written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
+ cpumask_pr_args(wq->unbound_attrs->cpumask));
+ mutex_unlock(&wq->mutex);
+ return written;
+}
+
+static ssize_t wq_cpumask_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct workqueue_struct *wq = dev_to_wq(dev);
+ struct workqueue_attrs *attrs;
+ int ret;
+
+ attrs = wq_sysfs_prep_attrs(wq);
+ if (!attrs)
+ return -ENOMEM;
+
+ ret = cpumask_parse(buf, attrs->cpumask);
+ if (!ret)
+ ret = apply_workqueue_attrs(wq, attrs);
+
+ free_workqueue_attrs(attrs);
+ return ret ?: count;
+}
+
+static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct workqueue_struct *wq = dev_to_wq(dev);
+ int written;
- /* update NUMA affinity of unbound workqueues */
- list_for_each_entry(wq, &workqueues, list)
- wq_update_unbound_numa(wq, cpu, true);
+ mutex_lock(&wq->mutex);
+ written = scnprintf(buf, PAGE_SIZE, "%d\n",
+ !wq->unbound_attrs->no_numa);
+ mutex_unlock(&wq->mutex);
- mutex_unlock(&wq_pool_mutex);
- break;
- }
- return NOTIFY_OK;
+ return written;
}
-/*
- * Workqueues should be brought down after normal priority CPU notifiers.
- * This will be registered as low priority CPU notifier.
- */
-static int workqueue_cpu_down_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
+static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
- int cpu = (unsigned long)hcpu;
- struct work_struct unbind_work;
- struct workqueue_struct *wq;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DOWN_PREPARE:
- /* unbinding per-cpu workers should happen on the local CPU */
- INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
- queue_work_on(cpu, system_highpri_wq, &unbind_work);
+ struct workqueue_struct *wq = dev_to_wq(dev);
+ struct workqueue_attrs *attrs;
+ int v, ret;
- /* update NUMA affinity of unbound workqueues */
- mutex_lock(&wq_pool_mutex);
- list_for_each_entry(wq, &workqueues, list)
- wq_update_unbound_numa(wq, cpu, false);
- mutex_unlock(&wq_pool_mutex);
+ attrs = wq_sysfs_prep_attrs(wq);
+ if (!attrs)
+ return -ENOMEM;
- /* wait for per-cpu unbinding to finish */
- flush_work(&unbind_work);
- destroy_work_on_stack(&unbind_work);
- break;
+ ret = -EINVAL;
+ if (sscanf(buf, "%d", &v) == 1) {
+ attrs->no_numa = !v;
+ ret = apply_workqueue_attrs(wq, attrs);
}
- return NOTIFY_OK;
+
+ free_workqueue_attrs(attrs);
+ return ret ?: count;
}
-#ifdef CONFIG_SMP
+static struct device_attribute wq_sysfs_unbound_attrs[] = {
+ __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
+ __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
+ __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
+ __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
+ __ATTR_NULL,
+};
-struct work_for_cpu {
- struct work_struct work;
- long (*fn)(void *);
- void *arg;
- long ret;
+static struct bus_type wq_subsys = {
+ .name = "workqueue",
+ .dev_groups = wq_sysfs_groups,
};
-static void work_for_cpu_fn(struct work_struct *work)
+static int __init wq_sysfs_init(void)
{
- struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
-
- wfc->ret = wfc->fn(wfc->arg);
+ return subsys_virtual_register(&wq_subsys, NULL);
}
+core_initcall(wq_sysfs_init);
-/**
- * work_on_cpu - run a function in user context on a particular cpu
- * @cpu: the cpu to run on
- * @fn: the function to run
- * @arg: the function arg
- *
- * It is up to the caller to ensure that the cpu doesn't go offline.
- * The caller must not hold any locks which would prevent @fn from completing.
- *
- * Return: The value @fn returns.
- */
-long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
+static void wq_device_release(struct device *dev)
{
- struct work_for_cpu wfc = { .fn = fn, .arg = arg };
+ struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
- INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
- schedule_work_on(cpu, &wfc.work);
- flush_work(&wfc.work);
- destroy_work_on_stack(&wfc.work);
- return wfc.ret;
+ kfree(wq_dev);
}
-EXPORT_SYMBOL_GPL(work_on_cpu);
-#endif /* CONFIG_SMP */
-
-#ifdef CONFIG_FREEZER
/**
- * freeze_workqueues_begin - begin freezing workqueues
+ * workqueue_sysfs_register - make a workqueue visible in sysfs
+ * @wq: the workqueue to register
*
- * Start freezing workqueues. After this function returns, all freezable
- * workqueues will queue new works to their delayed_works list instead of
- * pool->worklist.
+ * Expose @wq in sysfs under /sys/bus/workqueue/devices.
+ * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
+ * which is the preferred method.
*
- * CONTEXT:
- * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
+ * Workqueue user should use this function directly iff it wants to apply
+ * workqueue_attrs before making the workqueue visible in sysfs; otherwise,
+ * apply_workqueue_attrs() may race against userland updating the
+ * attributes.
+ *
+ * Return: 0 on success, -errno on failure.
*/
-void freeze_workqueues_begin(void)
+int workqueue_sysfs_register(struct workqueue_struct *wq)
{
- struct workqueue_struct *wq;
- struct pool_workqueue *pwq;
-
- mutex_lock(&wq_pool_mutex);
+ struct wq_device *wq_dev;
+ int ret;
- WARN_ON_ONCE(workqueue_freezing);
- workqueue_freezing = true;
+ /*
+ * Adjusting max_active or creating new pwqs by applyting
+ * attributes breaks ordering guarantee. Disallow exposing ordered
+ * workqueues.
+ */
+ if (WARN_ON(wq->flags & __WQ_ORDERED))
+ return -EINVAL;
- list_for_each_entry(wq, &workqueues, list) {
- mutex_lock(&wq->mutex);
- for_each_pwq(pwq, wq)
- pwq_adjust_max_active(pwq);
- mutex_unlock(&wq->mutex);
- }
+ wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
+ if (!wq_dev)
+ return -ENOMEM;
- mutex_unlock(&wq_pool_mutex);
-}
+ wq_dev->wq = wq;
+ wq_dev->dev.bus = &wq_subsys;
+ wq_dev->dev.init_name = wq->name;
+ wq_dev->dev.release = wq_device_release;
-/**
- * freeze_workqueues_busy - are freezable workqueues still busy?
- *
- * Check whether freezing is complete. This function must be called
- * between freeze_workqueues_begin() and thaw_workqueues().
- *
- * CONTEXT:
- * Grabs and releases wq_pool_mutex.
- *
- * Return:
- * %true if some freezable workqueues are still busy. %false if freezing
- * is complete.
- */
-bool freeze_workqueues_busy(void)
-{
- bool busy = false;
- struct workqueue_struct *wq;
- struct pool_workqueue *pwq;
+ /*
+ * unbound_attrs are created separately. Suppress uevent until
+ * everything is ready.
+ */
+ dev_set_uevent_suppress(&wq_dev->dev, true);
- mutex_lock(&wq_pool_mutex);
+ ret = device_register(&wq_dev->dev);
+ if (ret) {
+ kfree(wq_dev);
+ wq->wq_dev = NULL;
+ return ret;
+ }
- WARN_ON_ONCE(!workqueue_freezing);
+ if (wq->flags & WQ_UNBOUND) {
+ struct device_attribute *attr;
- list_for_each_entry(wq, &workqueues, list) {
- if (!(wq->flags & WQ_FREEZABLE))
- continue;
- /*
- * nr_active is monotonically decreasing. It's safe
- * to peek without lock.
- */
- rcu_read_lock_sched();
- for_each_pwq(pwq, wq) {
- WARN_ON_ONCE(pwq->nr_active < 0);
- if (pwq->nr_active) {
- busy = true;
- rcu_read_unlock_sched();
- goto out_unlock;
+ for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
+ ret = device_create_file(&wq_dev->dev, attr);
+ if (ret) {
+ device_unregister(&wq_dev->dev);
+ wq->wq_dev = NULL;
+ return ret;
}
}
- rcu_read_unlock_sched();
}
-out_unlock:
- mutex_unlock(&wq_pool_mutex);
- return busy;
+
+ dev_set_uevent_suppress(&wq_dev->dev, false);
+ kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
+ return 0;
}
/**
- * thaw_workqueues - thaw workqueues
- *
- * Thaw workqueues. Normal queueing is restored and all collected
- * frozen works are transferred to their respective pool worklists.
+ * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
+ * @wq: the workqueue to unregister
*
- * CONTEXT:
- * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
+ * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
*/
-void thaw_workqueues(void)
+static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
{
- struct workqueue_struct *wq;
- struct pool_workqueue *pwq;
-
- mutex_lock(&wq_pool_mutex);
-
- if (!workqueue_freezing)
- goto out_unlock;
-
- workqueue_freezing = false;
+ struct wq_device *wq_dev = wq->wq_dev;
- /* restore max_active and repopulate worklist */
- list_for_each_entry(wq, &workqueues, list) {
- mutex_lock(&wq->mutex);
- for_each_pwq(pwq, wq)
- pwq_adjust_max_active(pwq);
- mutex_unlock(&wq->mutex);
- }
+ if (!wq->wq_dev)
+ return;
-out_unlock:
- mutex_unlock(&wq_pool_mutex);
+ wq->wq_dev = NULL;
+ device_unregister(&wq_dev->dev);
}
-#endif /* CONFIG_FREEZER */
+#else /* CONFIG_SYSFS */
+static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
+#endif /* CONFIG_SYSFS */
static void __init wq_numa_init(void)
{