workqueue: implement worker states
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / kernel / workqueue.c
1 /*
2 * linux/kernel/workqueue.c
3 *
4 * Generic mechanism for defining kernel helper threads for running
5 * arbitrary tasks in process context.
6 *
7 * Started by Ingo Molnar, Copyright (C) 2002
8 *
9 * Derived from the taskqueue/keventd code by:
10 *
11 * David Woodhouse <dwmw2@infradead.org>
12 * Andrew Morton
13 * Kai Petzke <wpp@marie.physik.tu-berlin.de>
14 * Theodore Ts'o <tytso@mit.edu>
15 *
16 * Made to use alloc_percpu by Christoph Lameter.
17 */
18
19 #include <linux/module.h>
20 #include <linux/kernel.h>
21 #include <linux/sched.h>
22 #include <linux/init.h>
23 #include <linux/signal.h>
24 #include <linux/completion.h>
25 #include <linux/workqueue.h>
26 #include <linux/slab.h>
27 #include <linux/cpu.h>
28 #include <linux/notifier.h>
29 #include <linux/kthread.h>
30 #include <linux/hardirq.h>
31 #include <linux/mempolicy.h>
32 #include <linux/freezer.h>
33 #include <linux/kallsyms.h>
34 #include <linux/debug_locks.h>
35 #include <linux/lockdep.h>
36 #include <linux/idr.h>
37
38 enum {
39 /* worker flags */
40 WORKER_STARTED = 1 << 0, /* started */
41 WORKER_DIE = 1 << 1, /* die die die */
42 WORKER_IDLE = 1 << 2, /* is idle */
43
44 BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
45 BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER,
46 BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1,
47 };
48
49 /*
50 * Structure fields follow one of the following exclusion rules.
51 *
52 * I: Set during initialization and read-only afterwards.
53 *
54 * L: gcwq->lock protected. Access with gcwq->lock held.
55 *
56 * F: wq->flush_mutex protected.
57 *
58 * W: workqueue_lock protected.
59 */
60
61 struct global_cwq;
62 struct cpu_workqueue_struct;
63
64 struct worker {
65 /* on idle list while idle, on busy hash table while busy */
66 union {
67 struct list_head entry; /* L: while idle */
68 struct hlist_node hentry; /* L: while busy */
69 };
70
71 struct work_struct *current_work; /* L: work being processed */
72 struct list_head scheduled; /* L: scheduled works */
73 struct task_struct *task; /* I: worker task */
74 struct global_cwq *gcwq; /* I: the associated gcwq */
75 struct cpu_workqueue_struct *cwq; /* I: the associated cwq */
76 unsigned int flags; /* L: flags */
77 int id; /* I: worker id */
78 };
79
80 /*
81 * Global per-cpu workqueue.
82 */
83 struct global_cwq {
84 spinlock_t lock; /* the gcwq lock */
85 unsigned int cpu; /* I: the associated cpu */
86
87 int nr_workers; /* L: total number of workers */
88 int nr_idle; /* L: currently idle ones */
89
90 /* workers are chained either in the idle_list or busy_hash */
91 struct list_head idle_list; /* L: list of idle workers */
92 struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE];
93 /* L: hash of busy workers */
94
95 struct ida worker_ida; /* L: for worker IDs */
96 } ____cacheline_aligned_in_smp;
97
98 /*
99 * The per-CPU workqueue (if single thread, we always use the first
100 * possible cpu). The lower WORK_STRUCT_FLAG_BITS of
101 * work_struct->data are used for flags and thus cwqs need to be
102 * aligned at two's power of the number of flag bits.
103 */
104 struct cpu_workqueue_struct {
105 struct global_cwq *gcwq; /* I: the associated gcwq */
106 struct list_head worklist;
107 struct worker *worker;
108 struct workqueue_struct *wq; /* I: the owning workqueue */
109 int work_color; /* L: current color */
110 int flush_color; /* L: flushing color */
111 int nr_in_flight[WORK_NR_COLORS];
112 /* L: nr of in_flight works */
113 int nr_active; /* L: nr of active works */
114 int max_active; /* L: max active works */
115 struct list_head delayed_works; /* L: delayed works */
116 };
117
118 /*
119 * Structure used to wait for workqueue flush.
120 */
121 struct wq_flusher {
122 struct list_head list; /* F: list of flushers */
123 int flush_color; /* F: flush color waiting for */
124 struct completion done; /* flush completion */
125 };
126
127 /*
128 * The externally visible workqueue abstraction is an array of
129 * per-CPU workqueues:
130 */
131 struct workqueue_struct {
132 unsigned int flags; /* I: WQ_* flags */
133 struct cpu_workqueue_struct *cpu_wq; /* I: cwq's */
134 struct list_head list; /* W: list of all workqueues */
135
136 struct mutex flush_mutex; /* protects wq flushing */
137 int work_color; /* F: current work color */
138 int flush_color; /* F: current flush color */
139 atomic_t nr_cwqs_to_flush; /* flush in progress */
140 struct wq_flusher *first_flusher; /* F: first flusher */
141 struct list_head flusher_queue; /* F: flush waiters */
142 struct list_head flusher_overflow; /* F: flush overflow list */
143
144 int saved_max_active; /* I: saved cwq max_active */
145 const char *name; /* I: workqueue name */
146 #ifdef CONFIG_LOCKDEP
147 struct lockdep_map lockdep_map;
148 #endif
149 };
150
151 #ifdef CONFIG_DEBUG_OBJECTS_WORK
152
153 static struct debug_obj_descr work_debug_descr;
154
155 /*
156 * fixup_init is called when:
157 * - an active object is initialized
158 */
159 static int work_fixup_init(void *addr, enum debug_obj_state state)
160 {
161 struct work_struct *work = addr;
162
163 switch (state) {
164 case ODEBUG_STATE_ACTIVE:
165 cancel_work_sync(work);
166 debug_object_init(work, &work_debug_descr);
167 return 1;
168 default:
169 return 0;
170 }
171 }
172
173 /*
174 * fixup_activate is called when:
175 * - an active object is activated
176 * - an unknown object is activated (might be a statically initialized object)
177 */
178 static int work_fixup_activate(void *addr, enum debug_obj_state state)
179 {
180 struct work_struct *work = addr;
181
182 switch (state) {
183
184 case ODEBUG_STATE_NOTAVAILABLE:
185 /*
186 * This is not really a fixup. The work struct was
187 * statically initialized. We just make sure that it
188 * is tracked in the object tracker.
189 */
190 if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
191 debug_object_init(work, &work_debug_descr);
192 debug_object_activate(work, &work_debug_descr);
193 return 0;
194 }
195 WARN_ON_ONCE(1);
196 return 0;
197
198 case ODEBUG_STATE_ACTIVE:
199 WARN_ON(1);
200
201 default:
202 return 0;
203 }
204 }
205
206 /*
207 * fixup_free is called when:
208 * - an active object is freed
209 */
210 static int work_fixup_free(void *addr, enum debug_obj_state state)
211 {
212 struct work_struct *work = addr;
213
214 switch (state) {
215 case ODEBUG_STATE_ACTIVE:
216 cancel_work_sync(work);
217 debug_object_free(work, &work_debug_descr);
218 return 1;
219 default:
220 return 0;
221 }
222 }
223
224 static struct debug_obj_descr work_debug_descr = {
225 .name = "work_struct",
226 .fixup_init = work_fixup_init,
227 .fixup_activate = work_fixup_activate,
228 .fixup_free = work_fixup_free,
229 };
230
231 static inline void debug_work_activate(struct work_struct *work)
232 {
233 debug_object_activate(work, &work_debug_descr);
234 }
235
236 static inline void debug_work_deactivate(struct work_struct *work)
237 {
238 debug_object_deactivate(work, &work_debug_descr);
239 }
240
241 void __init_work(struct work_struct *work, int onstack)
242 {
243 if (onstack)
244 debug_object_init_on_stack(work, &work_debug_descr);
245 else
246 debug_object_init(work, &work_debug_descr);
247 }
248 EXPORT_SYMBOL_GPL(__init_work);
249
250 void destroy_work_on_stack(struct work_struct *work)
251 {
252 debug_object_free(work, &work_debug_descr);
253 }
254 EXPORT_SYMBOL_GPL(destroy_work_on_stack);
255
256 #else
257 static inline void debug_work_activate(struct work_struct *work) { }
258 static inline void debug_work_deactivate(struct work_struct *work) { }
259 #endif
260
261 /* Serializes the accesses to the list of workqueues. */
262 static DEFINE_SPINLOCK(workqueue_lock);
263 static LIST_HEAD(workqueues);
264 static bool workqueue_freezing; /* W: have wqs started freezing? */
265
266 static DEFINE_PER_CPU(struct global_cwq, global_cwq);
267
268 static int worker_thread(void *__worker);
269
270 static int singlethread_cpu __read_mostly;
271
272 static struct global_cwq *get_gcwq(unsigned int cpu)
273 {
274 return &per_cpu(global_cwq, cpu);
275 }
276
277 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
278 struct workqueue_struct *wq)
279 {
280 return per_cpu_ptr(wq->cpu_wq, cpu);
281 }
282
283 static struct cpu_workqueue_struct *target_cwq(unsigned int cpu,
284 struct workqueue_struct *wq)
285 {
286 if (unlikely(wq->flags & WQ_SINGLE_THREAD))
287 cpu = singlethread_cpu;
288 return get_cwq(cpu, wq);
289 }
290
291 static unsigned int work_color_to_flags(int color)
292 {
293 return color << WORK_STRUCT_COLOR_SHIFT;
294 }
295
296 static int get_work_color(struct work_struct *work)
297 {
298 return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
299 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
300 }
301
302 static int work_next_color(int color)
303 {
304 return (color + 1) % WORK_NR_COLORS;
305 }
306
307 /*
308 * Set the workqueue on which a work item is to be run
309 * - Must *only* be called if the pending flag is set
310 */
311 static inline void set_wq_data(struct work_struct *work,
312 struct cpu_workqueue_struct *cwq,
313 unsigned long extra_flags)
314 {
315 BUG_ON(!work_pending(work));
316
317 atomic_long_set(&work->data, (unsigned long)cwq | work_static(work) |
318 WORK_STRUCT_PENDING | extra_flags);
319 }
320
321 /*
322 * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued.
323 */
324 static inline void clear_wq_data(struct work_struct *work)
325 {
326 atomic_long_set(&work->data, work_static(work));
327 }
328
329 static inline struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
330 {
331 return (void *)(atomic_long_read(&work->data) &
332 WORK_STRUCT_WQ_DATA_MASK);
333 }
334
335 /**
336 * busy_worker_head - return the busy hash head for a work
337 * @gcwq: gcwq of interest
338 * @work: work to be hashed
339 *
340 * Return hash head of @gcwq for @work.
341 *
342 * CONTEXT:
343 * spin_lock_irq(gcwq->lock).
344 *
345 * RETURNS:
346 * Pointer to the hash head.
347 */
348 static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
349 struct work_struct *work)
350 {
351 const int base_shift = ilog2(sizeof(struct work_struct));
352 unsigned long v = (unsigned long)work;
353
354 /* simple shift and fold hash, do we need something better? */
355 v >>= base_shift;
356 v += v >> BUSY_WORKER_HASH_ORDER;
357 v &= BUSY_WORKER_HASH_MASK;
358
359 return &gcwq->busy_hash[v];
360 }
361
362 /**
363 * insert_work - insert a work into cwq
364 * @cwq: cwq @work belongs to
365 * @work: work to insert
366 * @head: insertion point
367 * @extra_flags: extra WORK_STRUCT_* flags to set
368 *
369 * Insert @work into @cwq after @head.
370 *
371 * CONTEXT:
372 * spin_lock_irq(gcwq->lock).
373 */
374 static void insert_work(struct cpu_workqueue_struct *cwq,
375 struct work_struct *work, struct list_head *head,
376 unsigned int extra_flags)
377 {
378 /* we own @work, set data and link */
379 set_wq_data(work, cwq, extra_flags);
380
381 /*
382 * Ensure that we get the right work->data if we see the
383 * result of list_add() below, see try_to_grab_pending().
384 */
385 smp_wmb();
386
387 list_add_tail(&work->entry, head);
388 wake_up_process(cwq->worker->task);
389 }
390
391 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
392 struct work_struct *work)
393 {
394 struct cpu_workqueue_struct *cwq = target_cwq(cpu, wq);
395 struct global_cwq *gcwq = cwq->gcwq;
396 struct list_head *worklist;
397 unsigned long flags;
398
399 debug_work_activate(work);
400
401 spin_lock_irqsave(&gcwq->lock, flags);
402 BUG_ON(!list_empty(&work->entry));
403
404 cwq->nr_in_flight[cwq->work_color]++;
405
406 if (likely(cwq->nr_active < cwq->max_active)) {
407 cwq->nr_active++;
408 worklist = &cwq->worklist;
409 } else
410 worklist = &cwq->delayed_works;
411
412 insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color));
413
414 spin_unlock_irqrestore(&gcwq->lock, flags);
415 }
416
417 /**
418 * queue_work - queue work on a workqueue
419 * @wq: workqueue to use
420 * @work: work to queue
421 *
422 * Returns 0 if @work was already on a queue, non-zero otherwise.
423 *
424 * We queue the work to the CPU on which it was submitted, but if the CPU dies
425 * it can be processed by another CPU.
426 */
427 int queue_work(struct workqueue_struct *wq, struct work_struct *work)
428 {
429 int ret;
430
431 ret = queue_work_on(get_cpu(), wq, work);
432 put_cpu();
433
434 return ret;
435 }
436 EXPORT_SYMBOL_GPL(queue_work);
437
438 /**
439 * queue_work_on - queue work on specific cpu
440 * @cpu: CPU number to execute work on
441 * @wq: workqueue to use
442 * @work: work to queue
443 *
444 * Returns 0 if @work was already on a queue, non-zero otherwise.
445 *
446 * We queue the work to a specific CPU, the caller must ensure it
447 * can't go away.
448 */
449 int
450 queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
451 {
452 int ret = 0;
453
454 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
455 __queue_work(cpu, wq, work);
456 ret = 1;
457 }
458 return ret;
459 }
460 EXPORT_SYMBOL_GPL(queue_work_on);
461
462 static void delayed_work_timer_fn(unsigned long __data)
463 {
464 struct delayed_work *dwork = (struct delayed_work *)__data;
465 struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
466
467 __queue_work(smp_processor_id(), cwq->wq, &dwork->work);
468 }
469
470 /**
471 * queue_delayed_work - queue work on a workqueue after delay
472 * @wq: workqueue to use
473 * @dwork: delayable work to queue
474 * @delay: number of jiffies to wait before queueing
475 *
476 * Returns 0 if @work was already on a queue, non-zero otherwise.
477 */
478 int queue_delayed_work(struct workqueue_struct *wq,
479 struct delayed_work *dwork, unsigned long delay)
480 {
481 if (delay == 0)
482 return queue_work(wq, &dwork->work);
483
484 return queue_delayed_work_on(-1, wq, dwork, delay);
485 }
486 EXPORT_SYMBOL_GPL(queue_delayed_work);
487
488 /**
489 * queue_delayed_work_on - queue work on specific CPU after delay
490 * @cpu: CPU number to execute work on
491 * @wq: workqueue to use
492 * @dwork: work to queue
493 * @delay: number of jiffies to wait before queueing
494 *
495 * Returns 0 if @work was already on a queue, non-zero otherwise.
496 */
497 int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
498 struct delayed_work *dwork, unsigned long delay)
499 {
500 int ret = 0;
501 struct timer_list *timer = &dwork->timer;
502 struct work_struct *work = &dwork->work;
503
504 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
505 BUG_ON(timer_pending(timer));
506 BUG_ON(!list_empty(&work->entry));
507
508 timer_stats_timer_set_start_info(&dwork->timer);
509
510 /* This stores cwq for the moment, for the timer_fn */
511 set_wq_data(work, target_cwq(raw_smp_processor_id(), wq), 0);
512 timer->expires = jiffies + delay;
513 timer->data = (unsigned long)dwork;
514 timer->function = delayed_work_timer_fn;
515
516 if (unlikely(cpu >= 0))
517 add_timer_on(timer, cpu);
518 else
519 add_timer(timer);
520 ret = 1;
521 }
522 return ret;
523 }
524 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
525
526 /**
527 * worker_enter_idle - enter idle state
528 * @worker: worker which is entering idle state
529 *
530 * @worker is entering idle state. Update stats and idle timer if
531 * necessary.
532 *
533 * LOCKING:
534 * spin_lock_irq(gcwq->lock).
535 */
536 static void worker_enter_idle(struct worker *worker)
537 {
538 struct global_cwq *gcwq = worker->gcwq;
539
540 BUG_ON(worker->flags & WORKER_IDLE);
541 BUG_ON(!list_empty(&worker->entry) &&
542 (worker->hentry.next || worker->hentry.pprev));
543
544 worker->flags |= WORKER_IDLE;
545 gcwq->nr_idle++;
546
547 /* idle_list is LIFO */
548 list_add(&worker->entry, &gcwq->idle_list);
549 }
550
551 /**
552 * worker_leave_idle - leave idle state
553 * @worker: worker which is leaving idle state
554 *
555 * @worker is leaving idle state. Update stats.
556 *
557 * LOCKING:
558 * spin_lock_irq(gcwq->lock).
559 */
560 static void worker_leave_idle(struct worker *worker)
561 {
562 struct global_cwq *gcwq = worker->gcwq;
563
564 BUG_ON(!(worker->flags & WORKER_IDLE));
565 worker->flags &= ~WORKER_IDLE;
566 gcwq->nr_idle--;
567 list_del_init(&worker->entry);
568 }
569
570 static struct worker *alloc_worker(void)
571 {
572 struct worker *worker;
573
574 worker = kzalloc(sizeof(*worker), GFP_KERNEL);
575 if (worker) {
576 INIT_LIST_HEAD(&worker->entry);
577 INIT_LIST_HEAD(&worker->scheduled);
578 }
579 return worker;
580 }
581
582 /**
583 * create_worker - create a new workqueue worker
584 * @cwq: cwq the new worker will belong to
585 * @bind: whether to set affinity to @cpu or not
586 *
587 * Create a new worker which is bound to @cwq. The returned worker
588 * can be started by calling start_worker() or destroyed using
589 * destroy_worker().
590 *
591 * CONTEXT:
592 * Might sleep. Does GFP_KERNEL allocations.
593 *
594 * RETURNS:
595 * Pointer to the newly created worker.
596 */
597 static struct worker *create_worker(struct cpu_workqueue_struct *cwq, bool bind)
598 {
599 struct global_cwq *gcwq = cwq->gcwq;
600 int id = -1;
601 struct worker *worker = NULL;
602
603 spin_lock_irq(&gcwq->lock);
604 while (ida_get_new(&gcwq->worker_ida, &id)) {
605 spin_unlock_irq(&gcwq->lock);
606 if (!ida_pre_get(&gcwq->worker_ida, GFP_KERNEL))
607 goto fail;
608 spin_lock_irq(&gcwq->lock);
609 }
610 spin_unlock_irq(&gcwq->lock);
611
612 worker = alloc_worker();
613 if (!worker)
614 goto fail;
615
616 worker->gcwq = gcwq;
617 worker->cwq = cwq;
618 worker->id = id;
619
620 worker->task = kthread_create(worker_thread, worker, "kworker/%u:%d",
621 gcwq->cpu, id);
622 if (IS_ERR(worker->task))
623 goto fail;
624
625 if (bind)
626 kthread_bind(worker->task, gcwq->cpu);
627
628 return worker;
629 fail:
630 if (id >= 0) {
631 spin_lock_irq(&gcwq->lock);
632 ida_remove(&gcwq->worker_ida, id);
633 spin_unlock_irq(&gcwq->lock);
634 }
635 kfree(worker);
636 return NULL;
637 }
638
639 /**
640 * start_worker - start a newly created worker
641 * @worker: worker to start
642 *
643 * Make the gcwq aware of @worker and start it.
644 *
645 * CONTEXT:
646 * spin_lock_irq(gcwq->lock).
647 */
648 static void start_worker(struct worker *worker)
649 {
650 worker->flags |= WORKER_STARTED;
651 worker->gcwq->nr_workers++;
652 worker_enter_idle(worker);
653 wake_up_process(worker->task);
654 }
655
656 /**
657 * destroy_worker - destroy a workqueue worker
658 * @worker: worker to be destroyed
659 *
660 * Destroy @worker and adjust @gcwq stats accordingly.
661 *
662 * CONTEXT:
663 * spin_lock_irq(gcwq->lock) which is released and regrabbed.
664 */
665 static void destroy_worker(struct worker *worker)
666 {
667 struct global_cwq *gcwq = worker->gcwq;
668 int id = worker->id;
669
670 /* sanity check frenzy */
671 BUG_ON(worker->current_work);
672 BUG_ON(!list_empty(&worker->scheduled));
673
674 if (worker->flags & WORKER_STARTED)
675 gcwq->nr_workers--;
676 if (worker->flags & WORKER_IDLE)
677 gcwq->nr_idle--;
678
679 list_del_init(&worker->entry);
680 worker->flags |= WORKER_DIE;
681
682 spin_unlock_irq(&gcwq->lock);
683
684 kthread_stop(worker->task);
685 kfree(worker);
686
687 spin_lock_irq(&gcwq->lock);
688 ida_remove(&gcwq->worker_ida, id);
689 }
690
691 /**
692 * move_linked_works - move linked works to a list
693 * @work: start of series of works to be scheduled
694 * @head: target list to append @work to
695 * @nextp: out paramter for nested worklist walking
696 *
697 * Schedule linked works starting from @work to @head. Work series to
698 * be scheduled starts at @work and includes any consecutive work with
699 * WORK_STRUCT_LINKED set in its predecessor.
700 *
701 * If @nextp is not NULL, it's updated to point to the next work of
702 * the last scheduled work. This allows move_linked_works() to be
703 * nested inside outer list_for_each_entry_safe().
704 *
705 * CONTEXT:
706 * spin_lock_irq(gcwq->lock).
707 */
708 static void move_linked_works(struct work_struct *work, struct list_head *head,
709 struct work_struct **nextp)
710 {
711 struct work_struct *n;
712
713 /*
714 * Linked worklist will always end before the end of the list,
715 * use NULL for list head.
716 */
717 list_for_each_entry_safe_from(work, n, NULL, entry) {
718 list_move_tail(&work->entry, head);
719 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
720 break;
721 }
722
723 /*
724 * If we're already inside safe list traversal and have moved
725 * multiple works to the scheduled queue, the next position
726 * needs to be updated.
727 */
728 if (nextp)
729 *nextp = n;
730 }
731
732 static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
733 {
734 struct work_struct *work = list_first_entry(&cwq->delayed_works,
735 struct work_struct, entry);
736
737 move_linked_works(work, &cwq->worklist, NULL);
738 cwq->nr_active++;
739 }
740
741 /**
742 * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
743 * @cwq: cwq of interest
744 * @color: color of work which left the queue
745 *
746 * A work either has completed or is removed from pending queue,
747 * decrement nr_in_flight of its cwq and handle workqueue flushing.
748 *
749 * CONTEXT:
750 * spin_lock_irq(gcwq->lock).
751 */
752 static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
753 {
754 /* ignore uncolored works */
755 if (color == WORK_NO_COLOR)
756 return;
757
758 cwq->nr_in_flight[color]--;
759 cwq->nr_active--;
760
761 /* one down, submit a delayed one */
762 if (!list_empty(&cwq->delayed_works) &&
763 cwq->nr_active < cwq->max_active)
764 cwq_activate_first_delayed(cwq);
765
766 /* is flush in progress and are we at the flushing tip? */
767 if (likely(cwq->flush_color != color))
768 return;
769
770 /* are there still in-flight works? */
771 if (cwq->nr_in_flight[color])
772 return;
773
774 /* this cwq is done, clear flush_color */
775 cwq->flush_color = -1;
776
777 /*
778 * If this was the last cwq, wake up the first flusher. It
779 * will handle the rest.
780 */
781 if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
782 complete(&cwq->wq->first_flusher->done);
783 }
784
785 /**
786 * process_one_work - process single work
787 * @worker: self
788 * @work: work to process
789 *
790 * Process @work. This function contains all the logics necessary to
791 * process a single work including synchronization against and
792 * interaction with other workers on the same cpu, queueing and
793 * flushing. As long as context requirement is met, any worker can
794 * call this function to process a work.
795 *
796 * CONTEXT:
797 * spin_lock_irq(gcwq->lock) which is released and regrabbed.
798 */
799 static void process_one_work(struct worker *worker, struct work_struct *work)
800 {
801 struct cpu_workqueue_struct *cwq = worker->cwq;
802 struct global_cwq *gcwq = cwq->gcwq;
803 struct hlist_head *bwh = busy_worker_head(gcwq, work);
804 work_func_t f = work->func;
805 int work_color;
806 #ifdef CONFIG_LOCKDEP
807 /*
808 * It is permissible to free the struct work_struct from
809 * inside the function that is called from it, this we need to
810 * take into account for lockdep too. To avoid bogus "held
811 * lock freed" warnings as well as problems when looking into
812 * work->lockdep_map, make a copy and use that here.
813 */
814 struct lockdep_map lockdep_map = work->lockdep_map;
815 #endif
816 /* claim and process */
817 debug_work_deactivate(work);
818 hlist_add_head(&worker->hentry, bwh);
819 worker->current_work = work;
820 work_color = get_work_color(work);
821 list_del_init(&work->entry);
822
823 spin_unlock_irq(&gcwq->lock);
824
825 BUG_ON(get_wq_data(work) != cwq);
826 work_clear_pending(work);
827 lock_map_acquire(&cwq->wq->lockdep_map);
828 lock_map_acquire(&lockdep_map);
829 f(work);
830 lock_map_release(&lockdep_map);
831 lock_map_release(&cwq->wq->lockdep_map);
832
833 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
834 printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
835 "%s/0x%08x/%d\n",
836 current->comm, preempt_count(), task_pid_nr(current));
837 printk(KERN_ERR " last function: ");
838 print_symbol("%s\n", (unsigned long)f);
839 debug_show_held_locks(current);
840 dump_stack();
841 }
842
843 spin_lock_irq(&gcwq->lock);
844
845 /* we're done with it, release */
846 hlist_del_init(&worker->hentry);
847 worker->current_work = NULL;
848 cwq_dec_nr_in_flight(cwq, work_color);
849 }
850
851 /**
852 * process_scheduled_works - process scheduled works
853 * @worker: self
854 *
855 * Process all scheduled works. Please note that the scheduled list
856 * may change while processing a work, so this function repeatedly
857 * fetches a work from the top and executes it.
858 *
859 * CONTEXT:
860 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
861 * multiple times.
862 */
863 static void process_scheduled_works(struct worker *worker)
864 {
865 while (!list_empty(&worker->scheduled)) {
866 struct work_struct *work = list_first_entry(&worker->scheduled,
867 struct work_struct, entry);
868 process_one_work(worker, work);
869 }
870 }
871
872 /**
873 * worker_thread - the worker thread function
874 * @__worker: self
875 *
876 * The cwq worker thread function.
877 */
878 static int worker_thread(void *__worker)
879 {
880 struct worker *worker = __worker;
881 struct global_cwq *gcwq = worker->gcwq;
882 struct cpu_workqueue_struct *cwq = worker->cwq;
883
884 woke_up:
885 if (unlikely(!cpumask_equal(&worker->task->cpus_allowed,
886 get_cpu_mask(gcwq->cpu))))
887 set_cpus_allowed_ptr(worker->task, get_cpu_mask(gcwq->cpu));
888
889 spin_lock_irq(&gcwq->lock);
890
891 /* DIE can be set only while we're idle, checking here is enough */
892 if (worker->flags & WORKER_DIE) {
893 spin_unlock_irq(&gcwq->lock);
894 return 0;
895 }
896
897 worker_leave_idle(worker);
898
899 /*
900 * ->scheduled list can only be filled while a worker is
901 * preparing to process a work or actually processing it.
902 * Make sure nobody diddled with it while I was sleeping.
903 */
904 BUG_ON(!list_empty(&worker->scheduled));
905
906 while (!list_empty(&cwq->worklist)) {
907 struct work_struct *work =
908 list_first_entry(&cwq->worklist,
909 struct work_struct, entry);
910
911 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
912 /* optimization path, not strictly necessary */
913 process_one_work(worker, work);
914 if (unlikely(!list_empty(&worker->scheduled)))
915 process_scheduled_works(worker);
916 } else {
917 move_linked_works(work, &worker->scheduled, NULL);
918 process_scheduled_works(worker);
919 }
920 }
921
922 /*
923 * gcwq->lock is held and there's no work to process, sleep.
924 * Workers are woken up only while holding gcwq->lock, so
925 * setting the current state before releasing gcwq->lock is
926 * enough to prevent losing any event.
927 */
928 worker_enter_idle(worker);
929 __set_current_state(TASK_INTERRUPTIBLE);
930 spin_unlock_irq(&gcwq->lock);
931 schedule();
932 goto woke_up;
933 }
934
935 struct wq_barrier {
936 struct work_struct work;
937 struct completion done;
938 };
939
940 static void wq_barrier_func(struct work_struct *work)
941 {
942 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
943 complete(&barr->done);
944 }
945
946 /**
947 * insert_wq_barrier - insert a barrier work
948 * @cwq: cwq to insert barrier into
949 * @barr: wq_barrier to insert
950 * @target: target work to attach @barr to
951 * @worker: worker currently executing @target, NULL if @target is not executing
952 *
953 * @barr is linked to @target such that @barr is completed only after
954 * @target finishes execution. Please note that the ordering
955 * guarantee is observed only with respect to @target and on the local
956 * cpu.
957 *
958 * Currently, a queued barrier can't be canceled. This is because
959 * try_to_grab_pending() can't determine whether the work to be
960 * grabbed is at the head of the queue and thus can't clear LINKED
961 * flag of the previous work while there must be a valid next work
962 * after a work with LINKED flag set.
963 *
964 * Note that when @worker is non-NULL, @target may be modified
965 * underneath us, so we can't reliably determine cwq from @target.
966 *
967 * CONTEXT:
968 * spin_lock_irq(gcwq->lock).
969 */
970 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
971 struct wq_barrier *barr,
972 struct work_struct *target, struct worker *worker)
973 {
974 struct list_head *head;
975 unsigned int linked = 0;
976
977 /*
978 * debugobject calls are safe here even with gcwq->lock locked
979 * as we know for sure that this will not trigger any of the
980 * checks and call back into the fixup functions where we
981 * might deadlock.
982 */
983 INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
984 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
985 init_completion(&barr->done);
986
987 /*
988 * If @target is currently being executed, schedule the
989 * barrier to the worker; otherwise, put it after @target.
990 */
991 if (worker)
992 head = worker->scheduled.next;
993 else {
994 unsigned long *bits = work_data_bits(target);
995
996 head = target->entry.next;
997 /* there can already be other linked works, inherit and set */
998 linked = *bits & WORK_STRUCT_LINKED;
999 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
1000 }
1001
1002 debug_work_activate(&barr->work);
1003 insert_work(cwq, &barr->work, head,
1004 work_color_to_flags(WORK_NO_COLOR) | linked);
1005 }
1006
1007 /**
1008 * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
1009 * @wq: workqueue being flushed
1010 * @flush_color: new flush color, < 0 for no-op
1011 * @work_color: new work color, < 0 for no-op
1012 *
1013 * Prepare cwqs for workqueue flushing.
1014 *
1015 * If @flush_color is non-negative, flush_color on all cwqs should be
1016 * -1. If no cwq has in-flight commands at the specified color, all
1017 * cwq->flush_color's stay at -1 and %false is returned. If any cwq
1018 * has in flight commands, its cwq->flush_color is set to
1019 * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
1020 * wakeup logic is armed and %true is returned.
1021 *
1022 * The caller should have initialized @wq->first_flusher prior to
1023 * calling this function with non-negative @flush_color. If
1024 * @flush_color is negative, no flush color update is done and %false
1025 * is returned.
1026 *
1027 * If @work_color is non-negative, all cwqs should have the same
1028 * work_color which is previous to @work_color and all will be
1029 * advanced to @work_color.
1030 *
1031 * CONTEXT:
1032 * mutex_lock(wq->flush_mutex).
1033 *
1034 * RETURNS:
1035 * %true if @flush_color >= 0 and there's something to flush. %false
1036 * otherwise.
1037 */
1038 static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
1039 int flush_color, int work_color)
1040 {
1041 bool wait = false;
1042 unsigned int cpu;
1043
1044 if (flush_color >= 0) {
1045 BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
1046 atomic_set(&wq->nr_cwqs_to_flush, 1);
1047 }
1048
1049 for_each_possible_cpu(cpu) {
1050 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1051 struct global_cwq *gcwq = cwq->gcwq;
1052
1053 spin_lock_irq(&gcwq->lock);
1054
1055 if (flush_color >= 0) {
1056 BUG_ON(cwq->flush_color != -1);
1057
1058 if (cwq->nr_in_flight[flush_color]) {
1059 cwq->flush_color = flush_color;
1060 atomic_inc(&wq->nr_cwqs_to_flush);
1061 wait = true;
1062 }
1063 }
1064
1065 if (work_color >= 0) {
1066 BUG_ON(work_color != work_next_color(cwq->work_color));
1067 cwq->work_color = work_color;
1068 }
1069
1070 spin_unlock_irq(&gcwq->lock);
1071 }
1072
1073 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
1074 complete(&wq->first_flusher->done);
1075
1076 return wait;
1077 }
1078
1079 /**
1080 * flush_workqueue - ensure that any scheduled work has run to completion.
1081 * @wq: workqueue to flush
1082 *
1083 * Forces execution of the workqueue and blocks until its completion.
1084 * This is typically used in driver shutdown handlers.
1085 *
1086 * We sleep until all works which were queued on entry have been handled,
1087 * but we are not livelocked by new incoming ones.
1088 */
1089 void flush_workqueue(struct workqueue_struct *wq)
1090 {
1091 struct wq_flusher this_flusher = {
1092 .list = LIST_HEAD_INIT(this_flusher.list),
1093 .flush_color = -1,
1094 .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
1095 };
1096 int next_color;
1097
1098 lock_map_acquire(&wq->lockdep_map);
1099 lock_map_release(&wq->lockdep_map);
1100
1101 mutex_lock(&wq->flush_mutex);
1102
1103 /*
1104 * Start-to-wait phase
1105 */
1106 next_color = work_next_color(wq->work_color);
1107
1108 if (next_color != wq->flush_color) {
1109 /*
1110 * Color space is not full. The current work_color
1111 * becomes our flush_color and work_color is advanced
1112 * by one.
1113 */
1114 BUG_ON(!list_empty(&wq->flusher_overflow));
1115 this_flusher.flush_color = wq->work_color;
1116 wq->work_color = next_color;
1117
1118 if (!wq->first_flusher) {
1119 /* no flush in progress, become the first flusher */
1120 BUG_ON(wq->flush_color != this_flusher.flush_color);
1121
1122 wq->first_flusher = &this_flusher;
1123
1124 if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
1125 wq->work_color)) {
1126 /* nothing to flush, done */
1127 wq->flush_color = next_color;
1128 wq->first_flusher = NULL;
1129 goto out_unlock;
1130 }
1131 } else {
1132 /* wait in queue */
1133 BUG_ON(wq->flush_color == this_flusher.flush_color);
1134 list_add_tail(&this_flusher.list, &wq->flusher_queue);
1135 flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
1136 }
1137 } else {
1138 /*
1139 * Oops, color space is full, wait on overflow queue.
1140 * The next flush completion will assign us
1141 * flush_color and transfer to flusher_queue.
1142 */
1143 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
1144 }
1145
1146 mutex_unlock(&wq->flush_mutex);
1147
1148 wait_for_completion(&this_flusher.done);
1149
1150 /*
1151 * Wake-up-and-cascade phase
1152 *
1153 * First flushers are responsible for cascading flushes and
1154 * handling overflow. Non-first flushers can simply return.
1155 */
1156 if (wq->first_flusher != &this_flusher)
1157 return;
1158
1159 mutex_lock(&wq->flush_mutex);
1160
1161 wq->first_flusher = NULL;
1162
1163 BUG_ON(!list_empty(&this_flusher.list));
1164 BUG_ON(wq->flush_color != this_flusher.flush_color);
1165
1166 while (true) {
1167 struct wq_flusher *next, *tmp;
1168
1169 /* complete all the flushers sharing the current flush color */
1170 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
1171 if (next->flush_color != wq->flush_color)
1172 break;
1173 list_del_init(&next->list);
1174 complete(&next->done);
1175 }
1176
1177 BUG_ON(!list_empty(&wq->flusher_overflow) &&
1178 wq->flush_color != work_next_color(wq->work_color));
1179
1180 /* this flush_color is finished, advance by one */
1181 wq->flush_color = work_next_color(wq->flush_color);
1182
1183 /* one color has been freed, handle overflow queue */
1184 if (!list_empty(&wq->flusher_overflow)) {
1185 /*
1186 * Assign the same color to all overflowed
1187 * flushers, advance work_color and append to
1188 * flusher_queue. This is the start-to-wait
1189 * phase for these overflowed flushers.
1190 */
1191 list_for_each_entry(tmp, &wq->flusher_overflow, list)
1192 tmp->flush_color = wq->work_color;
1193
1194 wq->work_color = work_next_color(wq->work_color);
1195
1196 list_splice_tail_init(&wq->flusher_overflow,
1197 &wq->flusher_queue);
1198 flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
1199 }
1200
1201 if (list_empty(&wq->flusher_queue)) {
1202 BUG_ON(wq->flush_color != wq->work_color);
1203 break;
1204 }
1205
1206 /*
1207 * Need to flush more colors. Make the next flusher
1208 * the new first flusher and arm cwqs.
1209 */
1210 BUG_ON(wq->flush_color == wq->work_color);
1211 BUG_ON(wq->flush_color != next->flush_color);
1212
1213 list_del_init(&next->list);
1214 wq->first_flusher = next;
1215
1216 if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
1217 break;
1218
1219 /*
1220 * Meh... this color is already done, clear first
1221 * flusher and repeat cascading.
1222 */
1223 wq->first_flusher = NULL;
1224 }
1225
1226 out_unlock:
1227 mutex_unlock(&wq->flush_mutex);
1228 }
1229 EXPORT_SYMBOL_GPL(flush_workqueue);
1230
1231 /**
1232 * flush_work - block until a work_struct's callback has terminated
1233 * @work: the work which is to be flushed
1234 *
1235 * Returns false if @work has already terminated.
1236 *
1237 * It is expected that, prior to calling flush_work(), the caller has
1238 * arranged for the work to not be requeued, otherwise it doesn't make
1239 * sense to use this function.
1240 */
1241 int flush_work(struct work_struct *work)
1242 {
1243 struct worker *worker = NULL;
1244 struct cpu_workqueue_struct *cwq;
1245 struct global_cwq *gcwq;
1246 struct wq_barrier barr;
1247
1248 might_sleep();
1249 cwq = get_wq_data(work);
1250 if (!cwq)
1251 return 0;
1252 gcwq = cwq->gcwq;
1253
1254 lock_map_acquire(&cwq->wq->lockdep_map);
1255 lock_map_release(&cwq->wq->lockdep_map);
1256
1257 spin_lock_irq(&gcwq->lock);
1258 if (!list_empty(&work->entry)) {
1259 /*
1260 * See the comment near try_to_grab_pending()->smp_rmb().
1261 * If it was re-queued under us we are not going to wait.
1262 */
1263 smp_rmb();
1264 if (unlikely(cwq != get_wq_data(work)))
1265 goto already_gone;
1266 } else {
1267 if (cwq->worker && cwq->worker->current_work == work)
1268 worker = cwq->worker;
1269 if (!worker)
1270 goto already_gone;
1271 }
1272
1273 insert_wq_barrier(cwq, &barr, work, worker);
1274 spin_unlock_irq(&gcwq->lock);
1275 wait_for_completion(&barr.done);
1276 destroy_work_on_stack(&barr.work);
1277 return 1;
1278 already_gone:
1279 spin_unlock_irq(&gcwq->lock);
1280 return 0;
1281 }
1282 EXPORT_SYMBOL_GPL(flush_work);
1283
1284 /*
1285 * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
1286 * so this work can't be re-armed in any way.
1287 */
1288 static int try_to_grab_pending(struct work_struct *work)
1289 {
1290 struct global_cwq *gcwq;
1291 struct cpu_workqueue_struct *cwq;
1292 int ret = -1;
1293
1294 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1295 return 0;
1296
1297 /*
1298 * The queueing is in progress, or it is already queued. Try to
1299 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
1300 */
1301
1302 cwq = get_wq_data(work);
1303 if (!cwq)
1304 return ret;
1305 gcwq = cwq->gcwq;
1306
1307 spin_lock_irq(&gcwq->lock);
1308 if (!list_empty(&work->entry)) {
1309 /*
1310 * This work is queued, but perhaps we locked the wrong cwq.
1311 * In that case we must see the new value after rmb(), see
1312 * insert_work()->wmb().
1313 */
1314 smp_rmb();
1315 if (cwq == get_wq_data(work)) {
1316 debug_work_deactivate(work);
1317 list_del_init(&work->entry);
1318 cwq_dec_nr_in_flight(cwq, get_work_color(work));
1319 ret = 1;
1320 }
1321 }
1322 spin_unlock_irq(&gcwq->lock);
1323
1324 return ret;
1325 }
1326
1327 static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
1328 struct work_struct *work)
1329 {
1330 struct global_cwq *gcwq = cwq->gcwq;
1331 struct wq_barrier barr;
1332 struct worker *worker;
1333
1334 spin_lock_irq(&gcwq->lock);
1335
1336 worker = NULL;
1337 if (unlikely(cwq->worker && cwq->worker->current_work == work)) {
1338 worker = cwq->worker;
1339 insert_wq_barrier(cwq, &barr, work, worker);
1340 }
1341
1342 spin_unlock_irq(&gcwq->lock);
1343
1344 if (unlikely(worker)) {
1345 wait_for_completion(&barr.done);
1346 destroy_work_on_stack(&barr.work);
1347 }
1348 }
1349
1350 static void wait_on_work(struct work_struct *work)
1351 {
1352 struct cpu_workqueue_struct *cwq;
1353 struct workqueue_struct *wq;
1354 int cpu;
1355
1356 might_sleep();
1357
1358 lock_map_acquire(&work->lockdep_map);
1359 lock_map_release(&work->lockdep_map);
1360
1361 cwq = get_wq_data(work);
1362 if (!cwq)
1363 return;
1364
1365 wq = cwq->wq;
1366
1367 for_each_possible_cpu(cpu)
1368 wait_on_cpu_work(get_cwq(cpu, wq), work);
1369 }
1370
1371 static int __cancel_work_timer(struct work_struct *work,
1372 struct timer_list* timer)
1373 {
1374 int ret;
1375
1376 do {
1377 ret = (timer && likely(del_timer(timer)));
1378 if (!ret)
1379 ret = try_to_grab_pending(work);
1380 wait_on_work(work);
1381 } while (unlikely(ret < 0));
1382
1383 clear_wq_data(work);
1384 return ret;
1385 }
1386
1387 /**
1388 * cancel_work_sync - block until a work_struct's callback has terminated
1389 * @work: the work which is to be flushed
1390 *
1391 * Returns true if @work was pending.
1392 *
1393 * cancel_work_sync() will cancel the work if it is queued. If the work's
1394 * callback appears to be running, cancel_work_sync() will block until it
1395 * has completed.
1396 *
1397 * It is possible to use this function if the work re-queues itself. It can
1398 * cancel the work even if it migrates to another workqueue, however in that
1399 * case it only guarantees that work->func() has completed on the last queued
1400 * workqueue.
1401 *
1402 * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not
1403 * pending, otherwise it goes into a busy-wait loop until the timer expires.
1404 *
1405 * The caller must ensure that workqueue_struct on which this work was last
1406 * queued can't be destroyed before this function returns.
1407 */
1408 int cancel_work_sync(struct work_struct *work)
1409 {
1410 return __cancel_work_timer(work, NULL);
1411 }
1412 EXPORT_SYMBOL_GPL(cancel_work_sync);
1413
1414 /**
1415 * cancel_delayed_work_sync - reliably kill off a delayed work.
1416 * @dwork: the delayed work struct
1417 *
1418 * Returns true if @dwork was pending.
1419 *
1420 * It is possible to use this function if @dwork rearms itself via queue_work()
1421 * or queue_delayed_work(). See also the comment for cancel_work_sync().
1422 */
1423 int cancel_delayed_work_sync(struct delayed_work *dwork)
1424 {
1425 return __cancel_work_timer(&dwork->work, &dwork->timer);
1426 }
1427 EXPORT_SYMBOL(cancel_delayed_work_sync);
1428
1429 static struct workqueue_struct *keventd_wq __read_mostly;
1430
1431 /**
1432 * schedule_work - put work task in global workqueue
1433 * @work: job to be done
1434 *
1435 * Returns zero if @work was already on the kernel-global workqueue and
1436 * non-zero otherwise.
1437 *
1438 * This puts a job in the kernel-global workqueue if it was not already
1439 * queued and leaves it in the same position on the kernel-global
1440 * workqueue otherwise.
1441 */
1442 int schedule_work(struct work_struct *work)
1443 {
1444 return queue_work(keventd_wq, work);
1445 }
1446 EXPORT_SYMBOL(schedule_work);
1447
1448 /*
1449 * schedule_work_on - put work task on a specific cpu
1450 * @cpu: cpu to put the work task on
1451 * @work: job to be done
1452 *
1453 * This puts a job on a specific cpu
1454 */
1455 int schedule_work_on(int cpu, struct work_struct *work)
1456 {
1457 return queue_work_on(cpu, keventd_wq, work);
1458 }
1459 EXPORT_SYMBOL(schedule_work_on);
1460
1461 /**
1462 * schedule_delayed_work - put work task in global workqueue after delay
1463 * @dwork: job to be done
1464 * @delay: number of jiffies to wait or 0 for immediate execution
1465 *
1466 * After waiting for a given time this puts a job in the kernel-global
1467 * workqueue.
1468 */
1469 int schedule_delayed_work(struct delayed_work *dwork,
1470 unsigned long delay)
1471 {
1472 return queue_delayed_work(keventd_wq, dwork, delay);
1473 }
1474 EXPORT_SYMBOL(schedule_delayed_work);
1475
1476 /**
1477 * flush_delayed_work - block until a dwork_struct's callback has terminated
1478 * @dwork: the delayed work which is to be flushed
1479 *
1480 * Any timeout is cancelled, and any pending work is run immediately.
1481 */
1482 void flush_delayed_work(struct delayed_work *dwork)
1483 {
1484 if (del_timer_sync(&dwork->timer)) {
1485 __queue_work(get_cpu(), get_wq_data(&dwork->work)->wq,
1486 &dwork->work);
1487 put_cpu();
1488 }
1489 flush_work(&dwork->work);
1490 }
1491 EXPORT_SYMBOL(flush_delayed_work);
1492
1493 /**
1494 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
1495 * @cpu: cpu to use
1496 * @dwork: job to be done
1497 * @delay: number of jiffies to wait
1498 *
1499 * After waiting for a given time this puts a job in the kernel-global
1500 * workqueue on the specified CPU.
1501 */
1502 int schedule_delayed_work_on(int cpu,
1503 struct delayed_work *dwork, unsigned long delay)
1504 {
1505 return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
1506 }
1507 EXPORT_SYMBOL(schedule_delayed_work_on);
1508
1509 /**
1510 * schedule_on_each_cpu - call a function on each online CPU from keventd
1511 * @func: the function to call
1512 *
1513 * Returns zero on success.
1514 * Returns -ve errno on failure.
1515 *
1516 * schedule_on_each_cpu() is very slow.
1517 */
1518 int schedule_on_each_cpu(work_func_t func)
1519 {
1520 int cpu;
1521 int orig = -1;
1522 struct work_struct *works;
1523
1524 works = alloc_percpu(struct work_struct);
1525 if (!works)
1526 return -ENOMEM;
1527
1528 get_online_cpus();
1529
1530 /*
1531 * When running in keventd don't schedule a work item on
1532 * itself. Can just call directly because the work queue is
1533 * already bound. This also is faster.
1534 */
1535 if (current_is_keventd())
1536 orig = raw_smp_processor_id();
1537
1538 for_each_online_cpu(cpu) {
1539 struct work_struct *work = per_cpu_ptr(works, cpu);
1540
1541 INIT_WORK(work, func);
1542 if (cpu != orig)
1543 schedule_work_on(cpu, work);
1544 }
1545 if (orig >= 0)
1546 func(per_cpu_ptr(works, orig));
1547
1548 for_each_online_cpu(cpu)
1549 flush_work(per_cpu_ptr(works, cpu));
1550
1551 put_online_cpus();
1552 free_percpu(works);
1553 return 0;
1554 }
1555
1556 /**
1557 * flush_scheduled_work - ensure that any scheduled work has run to completion.
1558 *
1559 * Forces execution of the kernel-global workqueue and blocks until its
1560 * completion.
1561 *
1562 * Think twice before calling this function! It's very easy to get into
1563 * trouble if you don't take great care. Either of the following situations
1564 * will lead to deadlock:
1565 *
1566 * One of the work items currently on the workqueue needs to acquire
1567 * a lock held by your code or its caller.
1568 *
1569 * Your code is running in the context of a work routine.
1570 *
1571 * They will be detected by lockdep when they occur, but the first might not
1572 * occur very often. It depends on what work items are on the workqueue and
1573 * what locks they need, which you have no control over.
1574 *
1575 * In most situations flushing the entire workqueue is overkill; you merely
1576 * need to know that a particular work item isn't queued and isn't running.
1577 * In such cases you should use cancel_delayed_work_sync() or
1578 * cancel_work_sync() instead.
1579 */
1580 void flush_scheduled_work(void)
1581 {
1582 flush_workqueue(keventd_wq);
1583 }
1584 EXPORT_SYMBOL(flush_scheduled_work);
1585
1586 /**
1587 * execute_in_process_context - reliably execute the routine with user context
1588 * @fn: the function to execute
1589 * @ew: guaranteed storage for the execute work structure (must
1590 * be available when the work executes)
1591 *
1592 * Executes the function immediately if process context is available,
1593 * otherwise schedules the function for delayed execution.
1594 *
1595 * Returns: 0 - function was executed
1596 * 1 - function was scheduled for execution
1597 */
1598 int execute_in_process_context(work_func_t fn, struct execute_work *ew)
1599 {
1600 if (!in_interrupt()) {
1601 fn(&ew->work);
1602 return 0;
1603 }
1604
1605 INIT_WORK(&ew->work, fn);
1606 schedule_work(&ew->work);
1607
1608 return 1;
1609 }
1610 EXPORT_SYMBOL_GPL(execute_in_process_context);
1611
1612 int keventd_up(void)
1613 {
1614 return keventd_wq != NULL;
1615 }
1616
1617 int current_is_keventd(void)
1618 {
1619 struct cpu_workqueue_struct *cwq;
1620 int cpu = raw_smp_processor_id(); /* preempt-safe: keventd is per-cpu */
1621 int ret = 0;
1622
1623 BUG_ON(!keventd_wq);
1624
1625 cwq = get_cwq(cpu, keventd_wq);
1626 if (current == cwq->worker->task)
1627 ret = 1;
1628
1629 return ret;
1630
1631 }
1632
1633 static struct cpu_workqueue_struct *alloc_cwqs(void)
1634 {
1635 /*
1636 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
1637 * Make sure that the alignment isn't lower than that of
1638 * unsigned long long.
1639 */
1640 const size_t size = sizeof(struct cpu_workqueue_struct);
1641 const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
1642 __alignof__(unsigned long long));
1643 struct cpu_workqueue_struct *cwqs;
1644 #ifndef CONFIG_SMP
1645 void *ptr;
1646
1647 /*
1648 * On UP, percpu allocator doesn't honor alignment parameter
1649 * and simply uses arch-dependent default. Allocate enough
1650 * room to align cwq and put an extra pointer at the end
1651 * pointing back to the originally allocated pointer which
1652 * will be used for free.
1653 *
1654 * FIXME: This really belongs to UP percpu code. Update UP
1655 * percpu code to honor alignment and remove this ugliness.
1656 */
1657 ptr = __alloc_percpu(size + align + sizeof(void *), 1);
1658 cwqs = PTR_ALIGN(ptr, align);
1659 *(void **)per_cpu_ptr(cwqs + 1, 0) = ptr;
1660 #else
1661 /* On SMP, percpu allocator can do it itself */
1662 cwqs = __alloc_percpu(size, align);
1663 #endif
1664 /* just in case, make sure it's actually aligned */
1665 BUG_ON(!IS_ALIGNED((unsigned long)cwqs, align));
1666 return cwqs;
1667 }
1668
1669 static void free_cwqs(struct cpu_workqueue_struct *cwqs)
1670 {
1671 #ifndef CONFIG_SMP
1672 /* on UP, the pointer to free is stored right after the cwq */
1673 if (cwqs)
1674 free_percpu(*(void **)per_cpu_ptr(cwqs + 1, 0));
1675 #else
1676 free_percpu(cwqs);
1677 #endif
1678 }
1679
1680 struct workqueue_struct *__create_workqueue_key(const char *name,
1681 unsigned int flags,
1682 int max_active,
1683 struct lock_class_key *key,
1684 const char *lock_name)
1685 {
1686 bool singlethread = flags & WQ_SINGLE_THREAD;
1687 struct workqueue_struct *wq;
1688 bool failed = false;
1689 unsigned int cpu;
1690
1691 max_active = clamp_val(max_active, 1, INT_MAX);
1692
1693 wq = kzalloc(sizeof(*wq), GFP_KERNEL);
1694 if (!wq)
1695 goto err;
1696
1697 wq->cpu_wq = alloc_cwqs();
1698 if (!wq->cpu_wq)
1699 goto err;
1700
1701 wq->flags = flags;
1702 wq->saved_max_active = max_active;
1703 mutex_init(&wq->flush_mutex);
1704 atomic_set(&wq->nr_cwqs_to_flush, 0);
1705 INIT_LIST_HEAD(&wq->flusher_queue);
1706 INIT_LIST_HEAD(&wq->flusher_overflow);
1707 wq->name = name;
1708 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
1709 INIT_LIST_HEAD(&wq->list);
1710
1711 cpu_maps_update_begin();
1712 /*
1713 * We must initialize cwqs for each possible cpu even if we
1714 * are going to call destroy_workqueue() finally. Otherwise
1715 * cpu_up() can hit the uninitialized cwq once we drop the
1716 * lock.
1717 */
1718 for_each_possible_cpu(cpu) {
1719 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1720 struct global_cwq *gcwq = get_gcwq(cpu);
1721
1722 BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
1723 cwq->gcwq = gcwq;
1724 cwq->wq = wq;
1725 cwq->flush_color = -1;
1726 cwq->max_active = max_active;
1727 INIT_LIST_HEAD(&cwq->worklist);
1728 INIT_LIST_HEAD(&cwq->delayed_works);
1729
1730 if (failed)
1731 continue;
1732 cwq->worker = create_worker(cwq,
1733 cpu_online(cpu) && !singlethread);
1734 if (cwq->worker)
1735 start_worker(cwq->worker);
1736 else
1737 failed = true;
1738 }
1739
1740 /*
1741 * workqueue_lock protects global freeze state and workqueues
1742 * list. Grab it, set max_active accordingly and add the new
1743 * workqueue to workqueues list.
1744 */
1745 spin_lock(&workqueue_lock);
1746
1747 if (workqueue_freezing && wq->flags & WQ_FREEZEABLE)
1748 for_each_possible_cpu(cpu)
1749 get_cwq(cpu, wq)->max_active = 0;
1750
1751 list_add(&wq->list, &workqueues);
1752
1753 spin_unlock(&workqueue_lock);
1754
1755 cpu_maps_update_done();
1756
1757 if (failed) {
1758 destroy_workqueue(wq);
1759 wq = NULL;
1760 }
1761 return wq;
1762 err:
1763 if (wq) {
1764 free_cwqs(wq->cpu_wq);
1765 kfree(wq);
1766 }
1767 return NULL;
1768 }
1769 EXPORT_SYMBOL_GPL(__create_workqueue_key);
1770
1771 /**
1772 * destroy_workqueue - safely terminate a workqueue
1773 * @wq: target workqueue
1774 *
1775 * Safely destroy a workqueue. All work currently pending will be done first.
1776 */
1777 void destroy_workqueue(struct workqueue_struct *wq)
1778 {
1779 unsigned int cpu;
1780
1781 flush_workqueue(wq);
1782
1783 /*
1784 * wq list is used to freeze wq, remove from list after
1785 * flushing is complete in case freeze races us.
1786 */
1787 cpu_maps_update_begin();
1788 spin_lock(&workqueue_lock);
1789 list_del(&wq->list);
1790 spin_unlock(&workqueue_lock);
1791 cpu_maps_update_done();
1792
1793 for_each_possible_cpu(cpu) {
1794 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1795 int i;
1796
1797 if (cwq->worker) {
1798 spin_lock_irq(&cwq->gcwq->lock);
1799 destroy_worker(cwq->worker);
1800 cwq->worker = NULL;
1801 spin_unlock_irq(&cwq->gcwq->lock);
1802 }
1803
1804 for (i = 0; i < WORK_NR_COLORS; i++)
1805 BUG_ON(cwq->nr_in_flight[i]);
1806 BUG_ON(cwq->nr_active);
1807 BUG_ON(!list_empty(&cwq->delayed_works));
1808 }
1809
1810 free_cwqs(wq->cpu_wq);
1811 kfree(wq);
1812 }
1813 EXPORT_SYMBOL_GPL(destroy_workqueue);
1814
1815 static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
1816 unsigned long action,
1817 void *hcpu)
1818 {
1819 unsigned int cpu = (unsigned long)hcpu;
1820 struct cpu_workqueue_struct *cwq;
1821 struct workqueue_struct *wq;
1822
1823 action &= ~CPU_TASKS_FROZEN;
1824
1825 list_for_each_entry(wq, &workqueues, list) {
1826 if (wq->flags & WQ_SINGLE_THREAD)
1827 continue;
1828
1829 cwq = get_cwq(cpu, wq);
1830
1831 switch (action) {
1832 case CPU_POST_DEAD:
1833 flush_workqueue(wq);
1834 break;
1835 }
1836 }
1837
1838 return notifier_from_errno(0);
1839 }
1840
1841 #ifdef CONFIG_SMP
1842
1843 struct work_for_cpu {
1844 struct completion completion;
1845 long (*fn)(void *);
1846 void *arg;
1847 long ret;
1848 };
1849
1850 static int do_work_for_cpu(void *_wfc)
1851 {
1852 struct work_for_cpu *wfc = _wfc;
1853 wfc->ret = wfc->fn(wfc->arg);
1854 complete(&wfc->completion);
1855 return 0;
1856 }
1857
1858 /**
1859 * work_on_cpu - run a function in user context on a particular cpu
1860 * @cpu: the cpu to run on
1861 * @fn: the function to run
1862 * @arg: the function arg
1863 *
1864 * This will return the value @fn returns.
1865 * It is up to the caller to ensure that the cpu doesn't go offline.
1866 * The caller must not hold any locks which would prevent @fn from completing.
1867 */
1868 long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
1869 {
1870 struct task_struct *sub_thread;
1871 struct work_for_cpu wfc = {
1872 .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
1873 .fn = fn,
1874 .arg = arg,
1875 };
1876
1877 sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
1878 if (IS_ERR(sub_thread))
1879 return PTR_ERR(sub_thread);
1880 kthread_bind(sub_thread, cpu);
1881 wake_up_process(sub_thread);
1882 wait_for_completion(&wfc.completion);
1883 return wfc.ret;
1884 }
1885 EXPORT_SYMBOL_GPL(work_on_cpu);
1886 #endif /* CONFIG_SMP */
1887
1888 #ifdef CONFIG_FREEZER
1889
1890 /**
1891 * freeze_workqueues_begin - begin freezing workqueues
1892 *
1893 * Start freezing workqueues. After this function returns, all
1894 * freezeable workqueues will queue new works to their frozen_works
1895 * list instead of the cwq ones.
1896 *
1897 * CONTEXT:
1898 * Grabs and releases workqueue_lock and gcwq->lock's.
1899 */
1900 void freeze_workqueues_begin(void)
1901 {
1902 struct workqueue_struct *wq;
1903 unsigned int cpu;
1904
1905 spin_lock(&workqueue_lock);
1906
1907 BUG_ON(workqueue_freezing);
1908 workqueue_freezing = true;
1909
1910 for_each_possible_cpu(cpu) {
1911 struct global_cwq *gcwq = get_gcwq(cpu);
1912
1913 spin_lock_irq(&gcwq->lock);
1914
1915 list_for_each_entry(wq, &workqueues, list) {
1916 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1917
1918 if (wq->flags & WQ_FREEZEABLE)
1919 cwq->max_active = 0;
1920 }
1921
1922 spin_unlock_irq(&gcwq->lock);
1923 }
1924
1925 spin_unlock(&workqueue_lock);
1926 }
1927
1928 /**
1929 * freeze_workqueues_busy - are freezeable workqueues still busy?
1930 *
1931 * Check whether freezing is complete. This function must be called
1932 * between freeze_workqueues_begin() and thaw_workqueues().
1933 *
1934 * CONTEXT:
1935 * Grabs and releases workqueue_lock.
1936 *
1937 * RETURNS:
1938 * %true if some freezeable workqueues are still busy. %false if
1939 * freezing is complete.
1940 */
1941 bool freeze_workqueues_busy(void)
1942 {
1943 struct workqueue_struct *wq;
1944 unsigned int cpu;
1945 bool busy = false;
1946
1947 spin_lock(&workqueue_lock);
1948
1949 BUG_ON(!workqueue_freezing);
1950
1951 for_each_possible_cpu(cpu) {
1952 /*
1953 * nr_active is monotonically decreasing. It's safe
1954 * to peek without lock.
1955 */
1956 list_for_each_entry(wq, &workqueues, list) {
1957 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1958
1959 if (!(wq->flags & WQ_FREEZEABLE))
1960 continue;
1961
1962 BUG_ON(cwq->nr_active < 0);
1963 if (cwq->nr_active) {
1964 busy = true;
1965 goto out_unlock;
1966 }
1967 }
1968 }
1969 out_unlock:
1970 spin_unlock(&workqueue_lock);
1971 return busy;
1972 }
1973
1974 /**
1975 * thaw_workqueues - thaw workqueues
1976 *
1977 * Thaw workqueues. Normal queueing is restored and all collected
1978 * frozen works are transferred to their respective cwq worklists.
1979 *
1980 * CONTEXT:
1981 * Grabs and releases workqueue_lock and gcwq->lock's.
1982 */
1983 void thaw_workqueues(void)
1984 {
1985 struct workqueue_struct *wq;
1986 unsigned int cpu;
1987
1988 spin_lock(&workqueue_lock);
1989
1990 if (!workqueue_freezing)
1991 goto out_unlock;
1992
1993 for_each_possible_cpu(cpu) {
1994 struct global_cwq *gcwq = get_gcwq(cpu);
1995
1996 spin_lock_irq(&gcwq->lock);
1997
1998 list_for_each_entry(wq, &workqueues, list) {
1999 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
2000
2001 if (!(wq->flags & WQ_FREEZEABLE))
2002 continue;
2003
2004 /* restore max_active and repopulate worklist */
2005 cwq->max_active = wq->saved_max_active;
2006
2007 while (!list_empty(&cwq->delayed_works) &&
2008 cwq->nr_active < cwq->max_active)
2009 cwq_activate_first_delayed(cwq);
2010
2011 wake_up_process(cwq->worker->task);
2012 }
2013
2014 spin_unlock_irq(&gcwq->lock);
2015 }
2016
2017 workqueue_freezing = false;
2018 out_unlock:
2019 spin_unlock(&workqueue_lock);
2020 }
2021 #endif /* CONFIG_FREEZER */
2022
2023 void __init init_workqueues(void)
2024 {
2025 unsigned int cpu;
2026 int i;
2027
2028 singlethread_cpu = cpumask_first(cpu_possible_mask);
2029 hotcpu_notifier(workqueue_cpu_callback, 0);
2030
2031 /* initialize gcwqs */
2032 for_each_possible_cpu(cpu) {
2033 struct global_cwq *gcwq = get_gcwq(cpu);
2034
2035 spin_lock_init(&gcwq->lock);
2036 gcwq->cpu = cpu;
2037
2038 INIT_LIST_HEAD(&gcwq->idle_list);
2039 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
2040 INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
2041
2042 ida_init(&gcwq->worker_ida);
2043 }
2044
2045 keventd_wq = create_workqueue("events");
2046 BUG_ON(!keventd_wq);
2047 }