kernel/workqueue.c

   1 /*
   2  * linux/kernel/workqueue.c
   3  *
   4  * Generic mechanism for defining kernel helper threads for running
   5  * arbitrary tasks in process context.
   6  *
   7  * Started by Ingo Molnar, Copyright (C) 2002
   8  *
   9  * Derived from the taskqueue/keventd code by:
  10  *
  11  *   David Woodhouse <dwmw2@infradead.org>
  12  *   Andrew Morton
  13  *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
  14  *   Theodore Ts'o <tytso@mit.edu>
  15  *
  16  * Made to use alloc_percpu by Christoph Lameter.
  17  */
  18
  19 #include <linux/module.h>
  20 #include <linux/kernel.h>
  21 #include <linux/sched.h>
  22 #include <linux/init.h>
  23 #include <linux/signal.h>
  24 #include <linux/completion.h>
  25 #include <linux/workqueue.h>
  26 #include <linux/slab.h>
  27 #include <linux/cpu.h>
  28 #include <linux/notifier.h>
  29 #include <linux/kthread.h>
  30 #include <linux/hardirq.h>
  31 #include <linux/mempolicy.h>
  32 #include <linux/freezer.h>
  33 #include <linux/kallsyms.h>
  34 #include <linux/debug_locks.h>
  35 #include <linux/lockdep.h>
  36 #include <linux/idr.h>
  37
  38 enum {
  39         /* worker flags */
  40         WORKER_STARTED          = 1 << 0,       /* started */
  41         WORKER_DIE              = 1 << 1,       /* die die die */
  42         WORKER_IDLE             = 1 << 2,       /* is idle */
  43
  44         BUSY_WORKER_HASH_ORDER  = 6,            /* 64 pointers */
  45         BUSY_WORKER_HASH_SIZE   = 1 << BUSY_WORKER_HASH_ORDER,
  46         BUSY_WORKER_HASH_MASK   = BUSY_WORKER_HASH_SIZE - 1,
  47 };
  48
  49 /*
  50  * Structure fields follow one of the following exclusion rules.
  51  *
  52  * I: Set during initialization and read-only afterwards.
  53  *
  54  * L: gcwq->lock protected.  Access with gcwq->lock held.
  55  *
  56  * F: wq->flush_mutex protected.
  57  *
  58  * W: workqueue_lock protected.
  59  */
  60
  61 struct global_cwq;
  62 struct cpu_workqueue_struct;
  63
  64 struct worker {
  65         /* on idle list while idle, on busy hash table while busy */
  66         union {
  67                 struct list_head        entry;  /* L: while idle */
  68                 struct hlist_node       hentry; /* L: while busy */
  69         };
  70
  71         struct work_struct      *current_work;  /* L: work being processed */
  72         struct list_head        scheduled;      /* L: scheduled works */
  73         struct task_struct      *task;          /* I: worker task */
  74         struct global_cwq       *gcwq;          /* I: the associated gcwq */
  75         struct cpu_workqueue_struct *cwq;       /* I: the associated cwq */
  76         unsigned int            flags;          /* L: flags */
  77         int                     id;             /* I: worker id */
  78 };
  79
  80 /*
  81  * Global per-cpu workqueue.
  82  */
  83 struct global_cwq {
  84         spinlock_t              lock;           /* the gcwq lock */
  85         unsigned int            cpu;            /* I: the associated cpu */
  86
  87         int                     nr_workers;     /* L: total number of workers */
  88         int                     nr_idle;        /* L: currently idle ones */
  89
  90         /* workers are chained either in the idle_list or busy_hash */
  91         struct list_head        idle_list;      /* L: list of idle workers */
  92         struct hlist_head       busy_hash[BUSY_WORKER_HASH_SIZE];
  93                                                 /* L: hash of busy workers */
  94
  95         struct ida              worker_ida;     /* L: for worker IDs */
  96 } ____cacheline_aligned_in_smp;
  97
  98 /*
  99  * The per-CPU workqueue (if single thread, we always use the first
 100  * possible cpu).  The lower WORK_STRUCT_FLAG_BITS of
 101  * work_struct->data are used for flags and thus cwqs need to be
 102  * aligned at two's power of the number of flag bits.
 103  */
 104 struct cpu_workqueue_struct {
 105         struct global_cwq       *gcwq;          /* I: the associated gcwq */
 106         struct list_head worklist;
 107         struct worker           *worker;
 108         struct workqueue_struct *wq;            /* I: the owning workqueue */
 109         int                     work_color;     /* L: current color */
 110         int                     flush_color;    /* L: flushing color */
 111         int                     nr_in_flight[WORK_NR_COLORS];
 112                                                 /* L: nr of in_flight works */
 113         int                     nr_active;      /* L: nr of active works */
 114         int                     max_active;     /* L: max active works */
 115         struct list_head        delayed_works;  /* L: delayed works */
 116 };
 117
 118 /*
 119  * Structure used to wait for workqueue flush.
 120  */
 121 struct wq_flusher {
 122         struct list_head        list;           /* F: list of flushers */
 123         int                     flush_color;    /* F: flush color waiting for */
 124         struct completion       done;           /* flush completion */
 125 };
 126
 127 /*
 128  * The externally visible workqueue abstraction is an array of
 129  * per-CPU workqueues:
 130  */
 131 struct workqueue_struct {
 132         unsigned int            flags;          /* I: WQ_* flags */
 133         struct cpu_workqueue_struct *cpu_wq;    /* I: cwq's */
 134         struct list_head        list;           /* W: list of all workqueues */
 135
 136         struct mutex            flush_mutex;    /* protects wq flushing */
 137         int                     work_color;     /* F: current work color */
 138         int                     flush_color;    /* F: current flush color */
 139         atomic_t                nr_cwqs_to_flush; /* flush in progress */
 140         struct wq_flusher       *first_flusher; /* F: first flusher */
 141         struct list_head        flusher_queue;  /* F: flush waiters */
 142         struct list_head        flusher_overflow; /* F: flush overflow list */
 143
 144         int                     saved_max_active; /* I: saved cwq max_active */
 145         const char              *name;          /* I: workqueue name */
 146 #ifdef CONFIG_LOCKDEP
 147         struct lockdep_map      lockdep_map;
 148 #endif
 149 };
 150
 151 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 152
 153 static struct debug_obj_descr work_debug_descr;
 154
 155 /*
 156  * fixup_init is called when:
 157  * - an active object is initialized
 158  */
 159 static int work_fixup_init(void *addr, enum debug_obj_state state)
 160 {
 161         struct work_struct *work = addr;
 162
 163         switch (state) {
 164         case ODEBUG_STATE_ACTIVE:
 165                 cancel_work_sync(work);
 166                 debug_object_init(work, &work_debug_descr);
 167                 return 1;
 168         default:
 169                 return 0;
 170         }
 171 }
 172
 173 /*
 174  * fixup_activate is called when:
 175  * - an active object is activated
 176  * - an unknown object is activated (might be a statically initialized object)
 177  */
 178 static int work_fixup_activate(void *addr, enum debug_obj_state state)
 179 {
 180         struct work_struct *work = addr;
 181
 182         switch (state) {
 183
 184         case ODEBUG_STATE_NOTAVAILABLE:
 185                 /*
 186                  * This is not really a fixup. The work struct was
 187                  * statically initialized. We just make sure that it
 188                  * is tracked in the object tracker.
 189                  */
 190                 if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
 191                         debug_object_init(work, &work_debug_descr);
 192                         debug_object_activate(work, &work_debug_descr);
 193                         return 0;
 194                 }
 195                 WARN_ON_ONCE(1);
 196                 return 0;
 197
 198         case ODEBUG_STATE_ACTIVE:
 199                 WARN_ON(1);
 200
 201         default:
 202                 return 0;
 203         }
 204 }
 205
 206 /*
 207  * fixup_free is called when:
 208  * - an active object is freed
 209  */
 210 static int work_fixup_free(void *addr, enum debug_obj_state state)
 211 {
 212         struct work_struct *work = addr;
 213
 214         switch (state) {
 215         case ODEBUG_STATE_ACTIVE:
 216                 cancel_work_sync(work);
 217                 debug_object_free(work, &work_debug_descr);
 218                 return 1;
 219         default:
 220                 return 0;
 221         }
 222 }
 223
 224 static struct debug_obj_descr work_debug_descr = {
 225         .name           = "work_struct",
 226         .fixup_init     = work_fixup_init,
 227         .fixup_activate = work_fixup_activate,
 228         .fixup_free     = work_fixup_free,
 229 };
 230
 231 static inline void debug_work_activate(struct work_struct *work)
 232 {
 233         debug_object_activate(work, &work_debug_descr);
 234 }
 235
 236 static inline void debug_work_deactivate(struct work_struct *work)
 237 {
 238         debug_object_deactivate(work, &work_debug_descr);
 239 }
 240
 241 void __init_work(struct work_struct *work, int onstack)
 242 {
 243         if (onstack)
 244                 debug_object_init_on_stack(work, &work_debug_descr);
 245         else
 246                 debug_object_init(work, &work_debug_descr);
 247 }
 248 EXPORT_SYMBOL_GPL(__init_work);
 249
 250 void destroy_work_on_stack(struct work_struct *work)
 251 {
 252         debug_object_free(work, &work_debug_descr);
 253 }
 254 EXPORT_SYMBOL_GPL(destroy_work_on_stack);
 255
 256 #else
 257 static inline void debug_work_activate(struct work_struct *work) { }
 258 static inline void debug_work_deactivate(struct work_struct *work) { }
 259 #endif
 260
 261 /* Serializes the accesses to the list of workqueues. */
 262 static DEFINE_SPINLOCK(workqueue_lock);
 263 static LIST_HEAD(workqueues);
 264 static bool workqueue_freezing;         /* W: have wqs started freezing? */
 265
 266 static DEFINE_PER_CPU(struct global_cwq, global_cwq);
 267
 268 static int worker_thread(void *__worker);
 269
 270 static int singlethread_cpu __read_mostly;
 271
 272 static struct global_cwq *get_gcwq(unsigned int cpu)
 273 {
 274         return &per_cpu(global_cwq, cpu);
 275 }
 276
 277 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
 278                                             struct workqueue_struct *wq)
 279 {
 280         return per_cpu_ptr(wq->cpu_wq, cpu);
 281 }
 282
 283 static struct cpu_workqueue_struct *target_cwq(unsigned int cpu,
 284                                                struct workqueue_struct *wq)
 285 {
 286         if (unlikely(wq->flags & WQ_SINGLE_THREAD))
 287                 cpu = singlethread_cpu;
 288         return get_cwq(cpu, wq);
 289 }
 290
 291 static unsigned int work_color_to_flags(int color)
 292 {
 293         return color << WORK_STRUCT_COLOR_SHIFT;
 294 }
 295
 296 static int get_work_color(struct work_struct *work)
 297 {
 298         return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
 299                 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
 300 }
 301
 302 static int work_next_color(int color)
 303 {
 304         return (color + 1) % WORK_NR_COLORS;
 305 }
 306
 307 /*
 308  * Set the workqueue on which a work item is to be run
 309  * - Must *only* be called if the pending flag is set
 310  */
 311 static inline void set_wq_data(struct work_struct *work,
 312                                struct cpu_workqueue_struct *cwq,
 313                                unsigned long extra_flags)
 314 {
 315         BUG_ON(!work_pending(work));
 316
 317         atomic_long_set(&work->data, (unsigned long)cwq | work_static(work) |
 318                         WORK_STRUCT_PENDING | extra_flags);
 319 }
 320
 321 /*
 322  * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued.
 323  */
 324 static inline void clear_wq_data(struct work_struct *work)
 325 {
 326         atomic_long_set(&work->data, work_static(work));
 327 }
 328
 329 static inline struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
 330 {
 331         return (void *)(atomic_long_read(&work->data) &
 332                         WORK_STRUCT_WQ_DATA_MASK);
 333 }
 334
 335 /**
 336  * busy_worker_head - return the busy hash head for a work
 337  * @gcwq: gcwq of interest
 338  * @work: work to be hashed
 339  *
 340  * Return hash head of @gcwq for @work.
 341  *
 342  * CONTEXT:
 343  * spin_lock_irq(gcwq->lock).
 344  *
 345  * RETURNS:
 346  * Pointer to the hash head.
 347  */
 348 static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
 349                                            struct work_struct *work)
 350 {
 351         const int base_shift = ilog2(sizeof(struct work_struct));
 352         unsigned long v = (unsigned long)work;
 353
 354         /* simple shift and fold hash, do we need something better? */
 355         v >>= base_shift;
 356         v += v >> BUSY_WORKER_HASH_ORDER;
 357         v &= BUSY_WORKER_HASH_MASK;
 358
 359         return &gcwq->busy_hash[v];
 360 }
 361
 362 /**
 363  * insert_work - insert a work into cwq
 364  * @cwq: cwq @work belongs to
 365  * @work: work to insert
 366  * @head: insertion point
 367  * @extra_flags: extra WORK_STRUCT_* flags to set
 368  *
 369  * Insert @work into @cwq after @head.
 370  *
 371  * CONTEXT:
 372  * spin_lock_irq(gcwq->lock).
 373  */
 374 static void insert_work(struct cpu_workqueue_struct *cwq,
 375                         struct work_struct *work, struct list_head *head,
 376                         unsigned int extra_flags)
 377 {
 378         /* we own @work, set data and link */
 379         set_wq_data(work, cwq, extra_flags);
 380
 381         /*
 382          * Ensure that we get the right work->data if we see the
 383          * result of list_add() below, see try_to_grab_pending().
 384          */
 385         smp_wmb();
 386
 387         list_add_tail(&work->entry, head);
 388         wake_up_process(cwq->worker->task);
 389 }
 390
 391 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 392                          struct work_struct *work)
 393 {
 394         struct cpu_workqueue_struct *cwq = target_cwq(cpu, wq);
 395         struct global_cwq *gcwq = cwq->gcwq;
 396         struct list_head *worklist;
 397         unsigned long flags;
 398
 399         debug_work_activate(work);
 400
 401         spin_lock_irqsave(&gcwq->lock, flags);
 402         BUG_ON(!list_empty(&work->entry));
 403
 404         cwq->nr_in_flight[cwq->work_color]++;
 405
 406         if (likely(cwq->nr_active < cwq->max_active)) {
 407                 cwq->nr_active++;
 408                 worklist = &cwq->worklist;
 409         } else
 410                 worklist = &cwq->delayed_works;
 411
 412         insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color));
 413
 414         spin_unlock_irqrestore(&gcwq->lock, flags);
 415 }
 416
 417 /**
 418  * queue_work - queue work on a workqueue
 419  * @wq: workqueue to use
 420  * @work: work to queue
 421  *
 422  * Returns 0 if @work was already on a queue, non-zero otherwise.
 423  *
 424  * We queue the work to the CPU on which it was submitted, but if the CPU dies
 425  * it can be processed by another CPU.
 426  */
 427 int queue_work(struct workqueue_struct *wq, struct work_struct *work)
 428 {
 429         int ret;
 430
 431         ret = queue_work_on(get_cpu(), wq, work);
 432         put_cpu();
 433
 434         return ret;
 435 }
 436 EXPORT_SYMBOL_GPL(queue_work);
 437
 438 /**
 439  * queue_work_on - queue work on specific cpu
 440  * @cpu: CPU number to execute work on
 441  * @wq: workqueue to use
 442  * @work: work to queue
 443  *
 444  * Returns 0 if @work was already on a queue, non-zero otherwise.
 445  *
 446  * We queue the work to a specific CPU, the caller must ensure it
 447  * can't go away.
 448  */
 449 int
 450 queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
 451 {
 452         int ret = 0;
 453
 454         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 455                 __queue_work(cpu, wq, work);
 456                 ret = 1;
 457         }
 458         return ret;
 459 }
 460 EXPORT_SYMBOL_GPL(queue_work_on);
 461
 462 static void delayed_work_timer_fn(unsigned long __data)
 463 {
 464         struct delayed_work *dwork = (struct delayed_work *)__data;
 465         struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
 466
 467         __queue_work(smp_processor_id(), cwq->wq, &dwork->work);
 468 }
 469
 470 /**
 471  * queue_delayed_work - queue work on a workqueue after delay
 472  * @wq: workqueue to use
 473  * @dwork: delayable work to queue
 474  * @delay: number of jiffies to wait before queueing
 475  *
 476  * Returns 0 if @work was already on a queue, non-zero otherwise.
 477  */
 478 int queue_delayed_work(struct workqueue_struct *wq,
 479                         struct delayed_work *dwork, unsigned long delay)
 480 {
 481         if (delay == 0)
 482                 return queue_work(wq, &dwork->work);
 483
 484         return queue_delayed_work_on(-1, wq, dwork, delay);
 485 }
 486 EXPORT_SYMBOL_GPL(queue_delayed_work);
 487
 488 /**
 489  * queue_delayed_work_on - queue work on specific CPU after delay
 490  * @cpu: CPU number to execute work on
 491  * @wq: workqueue to use
 492  * @dwork: work to queue
 493  * @delay: number of jiffies to wait before queueing
 494  *
 495  * Returns 0 if @work was already on a queue, non-zero otherwise.
 496  */
 497 int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 498                         struct delayed_work *dwork, unsigned long delay)
 499 {
 500         int ret = 0;
 501         struct timer_list *timer = &dwork->timer;
 502         struct work_struct *work = &dwork->work;
 503
 504         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 505                 BUG_ON(timer_pending(timer));
 506                 BUG_ON(!list_empty(&work->entry));
 507
 508                 timer_stats_timer_set_start_info(&dwork->timer);
 509
 510                 /* This stores cwq for the moment, for the timer_fn */
 511                 set_wq_data(work, target_cwq(raw_smp_processor_id(), wq), 0);
 512                 timer->expires = jiffies + delay;
 513                 timer->data = (unsigned long)dwork;
 514                 timer->function = delayed_work_timer_fn;
 515
 516                 if (unlikely(cpu >= 0))
 517                         add_timer_on(timer, cpu);
 518                 else
 519                         add_timer(timer);
 520                 ret = 1;
 521         }
 522         return ret;
 523 }
 524 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 525
 526 /**
 527  * worker_enter_idle - enter idle state
 528  * @worker: worker which is entering idle state
 529  *
 530  * @worker is entering idle state.  Update stats and idle timer if
 531  * necessary.
 532  *
 533  * LOCKING:
 534  * spin_lock_irq(gcwq->lock).
 535  */
 536 static void worker_enter_idle(struct worker *worker)
 537 {
 538         struct global_cwq *gcwq = worker->gcwq;
 539
 540         BUG_ON(worker->flags & WORKER_IDLE);
 541         BUG_ON(!list_empty(&worker->entry) &&
 542                (worker->hentry.next || worker->hentry.pprev));
 543
 544         worker->flags |= WORKER_IDLE;
 545         gcwq->nr_idle++;
 546
 547         /* idle_list is LIFO */
 548         list_add(&worker->entry, &gcwq->idle_list);
 549 }
 550
 551 /**
 552  * worker_leave_idle - leave idle state
 553  * @worker: worker which is leaving idle state
 554  *
 555  * @worker is leaving idle state.  Update stats.
 556  *
 557  * LOCKING:
 558  * spin_lock_irq(gcwq->lock).
 559  */
 560 static void worker_leave_idle(struct worker *worker)
 561 {
 562         struct global_cwq *gcwq = worker->gcwq;
 563
 564         BUG_ON(!(worker->flags & WORKER_IDLE));
 565         worker->flags &= ~WORKER_IDLE;
 566         gcwq->nr_idle--;
 567         list_del_init(&worker->entry);
 568 }
 569
 570 static struct worker *alloc_worker(void)
 571 {
 572         struct worker *worker;
 573
 574         worker = kzalloc(sizeof(*worker), GFP_KERNEL);
 575         if (worker) {
 576                 INIT_LIST_HEAD(&worker->entry);
 577                 INIT_LIST_HEAD(&worker->scheduled);
 578         }
 579         return worker;
 580 }
 581
 582 /**
 583  * create_worker - create a new workqueue worker
 584  * @cwq: cwq the new worker will belong to
 585  * @bind: whether to set affinity to @cpu or not
 586  *
 587  * Create a new worker which is bound to @cwq.  The returned worker
 588  * can be started by calling start_worker() or destroyed using
 589  * destroy_worker().
 590  *
 591  * CONTEXT:
 592  * Might sleep.  Does GFP_KERNEL allocations.
 593  *
 594  * RETURNS:
 595  * Pointer to the newly created worker.
 596  */
 597 static struct worker *create_worker(struct cpu_workqueue_struct *cwq, bool bind)
 598 {
 599         struct global_cwq *gcwq = cwq->gcwq;
 600         int id = -1;
 601         struct worker *worker = NULL;
 602
 603         spin_lock_irq(&gcwq->lock);
 604         while (ida_get_new(&gcwq->worker_ida, &id)) {
 605                 spin_unlock_irq(&gcwq->lock);
 606                 if (!ida_pre_get(&gcwq->worker_ida, GFP_KERNEL))
 607                         goto fail;
 608                 spin_lock_irq(&gcwq->lock);
 609         }
 610         spin_unlock_irq(&gcwq->lock);
 611
 612         worker = alloc_worker();
 613         if (!worker)
 614                 goto fail;
 615
 616         worker->gcwq = gcwq;
 617         worker->cwq = cwq;
 618         worker->id = id;
 619
 620         worker->task = kthread_create(worker_thread, worker, "kworker/%u:%d",
 621                                       gcwq->cpu, id);
 622         if (IS_ERR(worker->task))
 623                 goto fail;
 624
 625         if (bind)
 626                 kthread_bind(worker->task, gcwq->cpu);
 627
 628         return worker;
 629 fail:
 630         if (id >= 0) {
 631                 spin_lock_irq(&gcwq->lock);
 632                 ida_remove(&gcwq->worker_ida, id);
 633                 spin_unlock_irq(&gcwq->lock);
 634         }
 635         kfree(worker);
 636         return NULL;
 637 }
 638
 639 /**
 640  * start_worker - start a newly created worker
 641  * @worker: worker to start
 642  *
 643  * Make the gcwq aware of @worker and start it.
 644  *
 645  * CONTEXT:
 646  * spin_lock_irq(gcwq->lock).
 647  */
 648 static void start_worker(struct worker *worker)
 649 {
 650         worker->flags |= WORKER_STARTED;
 651         worker->gcwq->nr_workers++;
 652         worker_enter_idle(worker);
 653         wake_up_process(worker->task);
 654 }
 655
 656 /**
 657  * destroy_worker - destroy a workqueue worker
 658  * @worker: worker to be destroyed
 659  *
 660  * Destroy @worker and adjust @gcwq stats accordingly.
 661  *
 662  * CONTEXT:
 663  * spin_lock_irq(gcwq->lock) which is released and regrabbed.
 664  */
 665 static void destroy_worker(struct worker *worker)
 666 {
 667         struct global_cwq *gcwq = worker->gcwq;
 668         int id = worker->id;
 669
 670         /* sanity check frenzy */
 671         BUG_ON(worker->current_work);
 672         BUG_ON(!list_empty(&worker->scheduled));
 673
 674         if (worker->flags & WORKER_STARTED)
 675                 gcwq->nr_workers--;
 676         if (worker->flags & WORKER_IDLE)
 677                 gcwq->nr_idle--;
 678
 679         list_del_init(&worker->entry);
 680         worker->flags |= WORKER_DIE;
 681
 682         spin_unlock_irq(&gcwq->lock);
 683
 684         kthread_stop(worker->task);
 685         kfree(worker);
 686
 687         spin_lock_irq(&gcwq->lock);
 688         ida_remove(&gcwq->worker_ida, id);
 689 }
 690
 691 /**
 692  * move_linked_works - move linked works to a list
 693  * @work: start of series of works to be scheduled
 694  * @head: target list to append @work to
 695  * @nextp: out paramter for nested worklist walking
 696  *
 697  * Schedule linked works starting from @work to @head.  Work series to
 698  * be scheduled starts at @work and includes any consecutive work with
 699  * WORK_STRUCT_LINKED set in its predecessor.
 700  *
 701  * If @nextp is not NULL, it's updated to point to the next work of
 702  * the last scheduled work.  This allows move_linked_works() to be
 703  * nested inside outer list_for_each_entry_safe().
 704  *
 705  * CONTEXT:
 706  * spin_lock_irq(gcwq->lock).
 707  */
 708 static void move_linked_works(struct work_struct *work, struct list_head *head,
 709                               struct work_struct **nextp)
 710 {
 711         struct work_struct *n;
 712
 713         /*
 714          * Linked worklist will always end before the end of the list,
 715          * use NULL for list head.
 716          */
 717         list_for_each_entry_safe_from(work, n, NULL, entry) {
 718                 list_move_tail(&work->entry, head);
 719                 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
 720                         break;
 721         }
 722
 723         /*
 724          * If we're already inside safe list traversal and have moved
 725          * multiple works to the scheduled queue, the next position
 726          * needs to be updated.
 727          */
 728         if (nextp)
 729                 *nextp = n;
 730 }
 731
 732 static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
 733 {
 734         struct work_struct *work = list_first_entry(&cwq->delayed_works,
 735                                                     struct work_struct, entry);
 736
 737         move_linked_works(work, &cwq->worklist, NULL);
 738         cwq->nr_active++;
 739 }
 740
 741 /**
 742  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
 743  * @cwq: cwq of interest
 744  * @color: color of work which left the queue
 745  *
 746  * A work either has completed or is removed from pending queue,
 747  * decrement nr_in_flight of its cwq and handle workqueue flushing.
 748  *
 749  * CONTEXT:
 750  * spin_lock_irq(gcwq->lock).
 751  */
 752 static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
 753 {
 754         /* ignore uncolored works */
 755         if (color == WORK_NO_COLOR)
 756                 return;
 757
 758         cwq->nr_in_flight[color]--;
 759         cwq->nr_active--;
 760
 761         /* one down, submit a delayed one */
 762         if (!list_empty(&cwq->delayed_works) &&
 763             cwq->nr_active < cwq->max_active)
 764                 cwq_activate_first_delayed(cwq);
 765
 766         /* is flush in progress and are we at the flushing tip? */
 767         if (likely(cwq->flush_color != color))
 768                 return;
 769
 770         /* are there still in-flight works? */
 771         if (cwq->nr_in_flight[color])
 772                 return;
 773
 774         /* this cwq is done, clear flush_color */
 775         cwq->flush_color = -1;
 776
 777         /*
 778          * If this was the last cwq, wake up the first flusher.  It
 779          * will handle the rest.
 780          */
 781         if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
 782                 complete(&cwq->wq->first_flusher->done);
 783 }
 784
 785 /**
 786  * process_one_work - process single work
 787  * @worker: self
 788  * @work: work to process
 789  *
 790  * Process @work.  This function contains all the logics necessary to
 791  * process a single work including synchronization against and
 792  * interaction with other workers on the same cpu, queueing and
 793  * flushing.  As long as context requirement is met, any worker can
 794  * call this function to process a work.
 795  *
 796  * CONTEXT:
 797  * spin_lock_irq(gcwq->lock) which is released and regrabbed.
 798  */
 799 static void process_one_work(struct worker *worker, struct work_struct *work)
 800 {
 801         struct cpu_workqueue_struct *cwq = worker->cwq;
 802         struct global_cwq *gcwq = cwq->gcwq;
 803         struct hlist_head *bwh = busy_worker_head(gcwq, work);
 804         work_func_t f = work->func;
 805         int work_color;
 806 #ifdef CONFIG_LOCKDEP
 807         /*
 808          * It is permissible to free the struct work_struct from
 809          * inside the function that is called from it, this we need to
 810          * take into account for lockdep too.  To avoid bogus "held
 811          * lock freed" warnings as well as problems when looking into
 812          * work->lockdep_map, make a copy and use that here.
 813          */
 814         struct lockdep_map lockdep_map = work->lockdep_map;
 815 #endif
 816         /* claim and process */
 817         debug_work_deactivate(work);
 818         hlist_add_head(&worker->hentry, bwh);
 819         worker->current_work = work;
 820         work_color = get_work_color(work);
 821         list_del_init(&work->entry);
 822
 823         spin_unlock_irq(&gcwq->lock);
 824
 825         BUG_ON(get_wq_data(work) != cwq);
 826         work_clear_pending(work);
 827         lock_map_acquire(&cwq->wq->lockdep_map);
 828         lock_map_acquire(&lockdep_map);
 829         f(work);
 830         lock_map_release(&lockdep_map);
 831         lock_map_release(&cwq->wq->lockdep_map);
 832
 833         if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
 834                 printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
 835                        "%s/0x%08x/%d\n",
 836                        current->comm, preempt_count(), task_pid_nr(current));
 837                 printk(KERN_ERR "    last function: ");
 838                 print_symbol("%s\n", (unsigned long)f);
 839                 debug_show_held_locks(current);
 840                 dump_stack();
 841         }
 842
 843         spin_lock_irq(&gcwq->lock);
 844
 845         /* we're done with it, release */
 846         hlist_del_init(&worker->hentry);
 847         worker->current_work = NULL;
 848         cwq_dec_nr_in_flight(cwq, work_color);
 849 }
 850
 851 /**
 852  * process_scheduled_works - process scheduled works
 853  * @worker: self
 854  *
 855  * Process all scheduled works.  Please note that the scheduled list
 856  * may change while processing a work, so this function repeatedly
 857  * fetches a work from the top and executes it.
 858  *
 859  * CONTEXT:
 860  * spin_lock_irq(gcwq->lock) which may be released and regrabbed
 861  * multiple times.
 862  */
 863 static void process_scheduled_works(struct worker *worker)
 864 {
 865         while (!list_empty(&worker->scheduled)) {
 866                 struct work_struct *work = list_first_entry(&worker->scheduled,
 867                                                 struct work_struct, entry);
 868                 process_one_work(worker, work);
 869         }
 870 }
 871
 872 /**
 873  * worker_thread - the worker thread function
 874  * @__worker: self
 875  *
 876  * The cwq worker thread function.
 877  */
 878 static int worker_thread(void *__worker)
 879 {
 880         struct worker *worker = __worker;
 881         struct global_cwq *gcwq = worker->gcwq;
 882         struct cpu_workqueue_struct *cwq = worker->cwq;
 883
 884 woke_up:
 885         if (unlikely(!cpumask_equal(&worker->task->cpus_allowed,
 886                                     get_cpu_mask(gcwq->cpu))))
 887                 set_cpus_allowed_ptr(worker->task, get_cpu_mask(gcwq->cpu));
 888
 889         spin_lock_irq(&gcwq->lock);
 890
 891         /* DIE can be set only while we're idle, checking here is enough */
 892         if (worker->flags & WORKER_DIE) {
 893                 spin_unlock_irq(&gcwq->lock);
 894                 return 0;
 895         }
 896
 897         worker_leave_idle(worker);
 898
 899         /*
 900          * ->scheduled list can only be filled while a worker is
 901          * preparing to process a work or actually processing it.
 902          * Make sure nobody diddled with it while I was sleeping.
 903          */
 904         BUG_ON(!list_empty(&worker->scheduled));
 905
 906         while (!list_empty(&cwq->worklist)) {
 907                 struct work_struct *work =
 908                         list_first_entry(&cwq->worklist,
 909                                          struct work_struct, entry);
 910
 911                 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
 912                         /* optimization path, not strictly necessary */
 913                         process_one_work(worker, work);
 914                         if (unlikely(!list_empty(&worker->scheduled)))
 915                                 process_scheduled_works(worker);
 916                 } else {
 917                         move_linked_works(work, &worker->scheduled, NULL);
 918                         process_scheduled_works(worker);
 919                 }
 920         }
 921
 922         /*
 923          * gcwq->lock is held and there's no work to process, sleep.
 924          * Workers are woken up only while holding gcwq->lock, so
 925          * setting the current state before releasing gcwq->lock is
 926          * enough to prevent losing any event.
 927          */
 928         worker_enter_idle(worker);
 929         __set_current_state(TASK_INTERRUPTIBLE);
 930         spin_unlock_irq(&gcwq->lock);
 931         schedule();
 932         goto woke_up;
 933 }
 934
 935 struct wq_barrier {
 936         struct work_struct      work;
 937         struct completion       done;
 938 };
 939
 940 static void wq_barrier_func(struct work_struct *work)
 941 {
 942         struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
 943         complete(&barr->done);
 944 }
 945
 946 /**
 947  * insert_wq_barrier - insert a barrier work
 948  * @cwq: cwq to insert barrier into
 949  * @barr: wq_barrier to insert
 950  * @target: target work to attach @barr to
 951  * @worker: worker currently executing @target, NULL if @target is not executing
 952  *
 953  * @barr is linked to @target such that @barr is completed only after
 954  * @target finishes execution.  Please note that the ordering
 955  * guarantee is observed only with respect to @target and on the local
 956  * cpu.
 957  *
 958  * Currently, a queued barrier can't be canceled.  This is because
 959  * try_to_grab_pending() can't determine whether the work to be
 960  * grabbed is at the head of the queue and thus can't clear LINKED
 961  * flag of the previous work while there must be a valid next work
 962  * after a work with LINKED flag set.
 963  *
 964  * Note that when @worker is non-NULL, @target may be modified
 965  * underneath us, so we can't reliably determine cwq from @target.
 966  *
 967  * CONTEXT:
 968  * spin_lock_irq(gcwq->lock).
 969  */
 970 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 971                               struct wq_barrier *barr,
 972                               struct work_struct *target, struct worker *worker)
 973 {
 974         struct list_head *head;
 975         unsigned int linked = 0;
 976
 977         /*
 978          * debugobject calls are safe here even with gcwq->lock locked
 979          * as we know for sure that this will not trigger any of the
 980          * checks and call back into the fixup functions where we
 981          * might deadlock.
 982          */
 983         INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
 984         __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
 985         init_completion(&barr->done);
 986
 987         /*
 988          * If @target is currently being executed, schedule the
 989          * barrier to the worker; otherwise, put it after @target.
 990          */
 991         if (worker)
 992                 head = worker->scheduled.next;
 993         else {
 994                 unsigned long *bits = work_data_bits(target);
 995
 996                 head = target->entry.next;
 997                 /* there can already be other linked works, inherit and set */
 998                 linked = *bits & WORK_STRUCT_LINKED;
 999                 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
1000         }
1001
1002         debug_work_activate(&barr->work);
1003         insert_work(cwq, &barr->work, head,
1004                     work_color_to_flags(WORK_NO_COLOR) | linked);
1005 }
1006
1007 /**
1008  * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
1009  * @wq: workqueue being flushed
1010  * @flush_color: new flush color, < 0 for no-op
1011  * @work_color: new work color, < 0 for no-op
1012  *
1013  * Prepare cwqs for workqueue flushing.
1014  *
1015  * If @flush_color is non-negative, flush_color on all cwqs should be
1016  * -1.  If no cwq has in-flight commands at the specified color, all
1017  * cwq->flush_color's stay at -1 and %false is returned.  If any cwq
1018  * has in flight commands, its cwq->flush_color is set to
1019  * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
1020  * wakeup logic is armed and %true is returned.
1021  *
1022  * The caller should have initialized @wq->first_flusher prior to
1023  * calling this function with non-negative @flush_color.  If
1024  * @flush_color is negative, no flush color update is done and %false
1025  * is returned.
1026  *
1027  * If @work_color is non-negative, all cwqs should have the same
1028  * work_color which is previous to @work_color and all will be
1029  * advanced to @work_color.
1030  *
1031  * CONTEXT:
1032  * mutex_lock(wq->flush_mutex).
1033  *
1034  * RETURNS:
1035  * %true if @flush_color >= 0 and there's something to flush.  %false
1036  * otherwise.
1037  */
1038 static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
1039                                       int flush_color, int work_color)
1040 {
1041         bool wait = false;
1042         unsigned int cpu;
1043
1044         if (flush_color >= 0) {
1045                 BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
1046                 atomic_set(&wq->nr_cwqs_to_flush, 1);
1047         }
1048
1049         for_each_possible_cpu(cpu) {
1050                 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1051                 struct global_cwq *gcwq = cwq->gcwq;
1052
1053                 spin_lock_irq(&gcwq->lock);
1054
1055                 if (flush_color >= 0) {
1056                         BUG_ON(cwq->flush_color != -1);
1057
1058                         if (cwq->nr_in_flight[flush_color]) {
1059                                 cwq->flush_color = flush_color;
1060                                 atomic_inc(&wq->nr_cwqs_to_flush);
1061                                 wait = true;
1062                         }
1063                 }
1064
1065                 if (work_color >= 0) {
1066                         BUG_ON(work_color != work_next_color(cwq->work_color));
1067                         cwq->work_color = work_color;
1068                 }
1069
1070                 spin_unlock_irq(&gcwq->lock);
1071         }
1072
1073         if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
1074                 complete(&wq->first_flusher->done);
1075
1076         return wait;
1077 }
1078
1079 /**
1080  * flush_workqueue - ensure that any scheduled work has run to completion.
1081  * @wq: workqueue to flush
1082  *
1083  * Forces execution of the workqueue and blocks until its completion.
1084  * This is typically used in driver shutdown handlers.
1085  *
1086  * We sleep until all works which were queued on entry have been handled,
1087  * but we are not livelocked by new incoming ones.
1088  */
1089 void flush_workqueue(struct workqueue_struct *wq)
1090 {
1091         struct wq_flusher this_flusher = {
1092                 .list = LIST_HEAD_INIT(this_flusher.list),
1093                 .flush_color = -1,
1094                 .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
1095         };
1096         int next_color;
1097
1098         lock_map_acquire(&wq->lockdep_map);
1099         lock_map_release(&wq->lockdep_map);
1100
1101         mutex_lock(&wq->flush_mutex);
1102
1103         /*
1104          * Start-to-wait phase
1105          */
1106         next_color = work_next_color(wq->work_color);
1107
1108         if (next_color != wq->flush_color) {
1109                 /*
1110                  * Color space is not full.  The current work_color
1111                  * becomes our flush_color and work_color is advanced
1112                  * by one.
1113                  */
1114                 BUG_ON(!list_empty(&wq->flusher_overflow));
1115                 this_flusher.flush_color = wq->work_color;
1116                 wq->work_color = next_color;
1117
1118                 if (!wq->first_flusher) {
1119                         /* no flush in progress, become the first flusher */
1120                         BUG_ON(wq->flush_color != this_flusher.flush_color);
1121
1122                         wq->first_flusher = &this_flusher;
1123
1124                         if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
1125                                                        wq->work_color)) {
1126                                 /* nothing to flush, done */
1127                                 wq->flush_color = next_color;
1128                                 wq->first_flusher = NULL;
1129                                 goto out_unlock;
1130                         }
1131                 } else {
1132                         /* wait in queue */
1133                         BUG_ON(wq->flush_color == this_flusher.flush_color);
1134                         list_add_tail(&this_flusher.list, &wq->flusher_queue);
1135                         flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
1136                 }
1137         } else {
1138                 /*
1139                  * Oops, color space is full, wait on overflow queue.
1140                  * The next flush completion will assign us
1141                  * flush_color and transfer to flusher_queue.
1142                  */
1143                 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
1144         }
1145
1146         mutex_unlock(&wq->flush_mutex);
1147
1148         wait_for_completion(&this_flusher.done);
1149
1150         /*
1151          * Wake-up-and-cascade phase
1152          *
1153          * First flushers are responsible for cascading flushes and
1154          * handling overflow.  Non-first flushers can simply return.
1155          */
1156         if (wq->first_flusher != &this_flusher)
1157                 return;
1158
1159         mutex_lock(&wq->flush_mutex);
1160
1161         wq->first_flusher = NULL;
1162
1163         BUG_ON(!list_empty(&this_flusher.list));
1164         BUG_ON(wq->flush_color != this_flusher.flush_color);
1165
1166         while (true) {
1167                 struct wq_flusher *next, *tmp;
1168
1169                 /* complete all the flushers sharing the current flush color */
1170                 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
1171                         if (next->flush_color != wq->flush_color)
1172                                 break;
1173                         list_del_init(&next->list);
1174                         complete(&next->done);
1175                 }
1176
1177                 BUG_ON(!list_empty(&wq->flusher_overflow) &&
1178                        wq->flush_color != work_next_color(wq->work_color));
1179
1180                 /* this flush_color is finished, advance by one */
1181                 wq->flush_color = work_next_color(wq->flush_color);
1182
1183                 /* one color has been freed, handle overflow queue */
1184                 if (!list_empty(&wq->flusher_overflow)) {
1185                         /*
1186                          * Assign the same color to all overflowed
1187                          * flushers, advance work_color and append to
1188                          * flusher_queue.  This is the start-to-wait
1189                          * phase for these overflowed flushers.
1190                          */
1191                         list_for_each_entry(tmp, &wq->flusher_overflow, list)
1192                                 tmp->flush_color = wq->work_color;
1193
1194                         wq->work_color = work_next_color(wq->work_color);
1195
1196                         list_splice_tail_init(&wq->flusher_overflow,
1197                                               &wq->flusher_queue);
1198                         flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
1199                 }
1200
1201                 if (list_empty(&wq->flusher_queue)) {
1202                         BUG_ON(wq->flush_color != wq->work_color);
1203                         break;
1204                 }
1205
1206                 /*
1207                  * Need to flush more colors.  Make the next flusher
1208                  * the new first flusher and arm cwqs.
1209                  */
1210                 BUG_ON(wq->flush_color == wq->work_color);
1211                 BUG_ON(wq->flush_color != next->flush_color);
1212
1213                 list_del_init(&next->list);
1214                 wq->first_flusher = next;
1215
1216                 if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
1217                         break;
1218
1219                 /*
1220                  * Meh... this color is already done, clear first
1221                  * flusher and repeat cascading.
1222                  */
1223                 wq->first_flusher = NULL;
1224         }
1225
1226 out_unlock:
1227         mutex_unlock(&wq->flush_mutex);
1228 }
1229 EXPORT_SYMBOL_GPL(flush_workqueue);
1230
1231 /**
1232  * flush_work - block until a work_struct's callback has terminated
1233  * @work: the work which is to be flushed
1234  *
1235  * Returns false if @work has already terminated.
1236  *
1237  * It is expected that, prior to calling flush_work(), the caller has
1238  * arranged for the work to not be requeued, otherwise it doesn't make
1239  * sense to use this function.
1240  */
1241 int flush_work(struct work_struct *work)
1242 {
1243         struct worker *worker = NULL;
1244         struct cpu_workqueue_struct *cwq;
1245         struct global_cwq *gcwq;
1246         struct wq_barrier barr;
1247
1248         might_sleep();
1249         cwq = get_wq_data(work);
1250         if (!cwq)
1251                 return 0;
1252         gcwq = cwq->gcwq;
1253
1254         lock_map_acquire(&cwq->wq->lockdep_map);
1255         lock_map_release(&cwq->wq->lockdep_map);
1256
1257         spin_lock_irq(&gcwq->lock);
1258         if (!list_empty(&work->entry)) {
1259                 /*
1260                  * See the comment near try_to_grab_pending()->smp_rmb().
1261                  * If it was re-queued under us we are not going to wait.
1262                  */
1263                 smp_rmb();
1264                 if (unlikely(cwq != get_wq_data(work)))
1265                         goto already_gone;
1266         } else {
1267                 if (cwq->worker && cwq->worker->current_work == work)
1268                         worker = cwq->worker;
1269                 if (!worker)
1270                         goto already_gone;
1271         }
1272
1273         insert_wq_barrier(cwq, &barr, work, worker);
1274         spin_unlock_irq(&gcwq->lock);
1275         wait_for_completion(&barr.done);
1276         destroy_work_on_stack(&barr.work);
1277         return 1;
1278 already_gone:
1279         spin_unlock_irq(&gcwq->lock);
1280         return 0;
1281 }
1282 EXPORT_SYMBOL_GPL(flush_work);
1283
1284 /*
1285  * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
1286  * so this work can't be re-armed in any way.
1287  */
1288 static int try_to_grab_pending(struct work_struct *work)
1289 {
1290         struct global_cwq *gcwq;
1291         struct cpu_workqueue_struct *cwq;
1292         int ret = -1;
1293
1294         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1295                 return 0;
1296
1297         /*
1298          * The queueing is in progress, or it is already queued. Try to
1299          * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
1300          */
1301
1302         cwq = get_wq_data(work);
1303         if (!cwq)
1304                 return ret;
1305         gcwq = cwq->gcwq;
1306
1307         spin_lock_irq(&gcwq->lock);
1308         if (!list_empty(&work->entry)) {
1309                 /*
1310                  * This work is queued, but perhaps we locked the wrong cwq.
1311                  * In that case we must see the new value after rmb(), see
1312                  * insert_work()->wmb().
1313                  */
1314                 smp_rmb();
1315                 if (cwq == get_wq_data(work)) {
1316                         debug_work_deactivate(work);
1317                         list_del_init(&work->entry);
1318                         cwq_dec_nr_in_flight(cwq, get_work_color(work));
1319                         ret = 1;
1320                 }
1321         }
1322         spin_unlock_irq(&gcwq->lock);
1323
1324         return ret;
1325 }
1326
1327 static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
1328                                 struct work_struct *work)
1329 {
1330         struct global_cwq *gcwq = cwq->gcwq;
1331         struct wq_barrier barr;
1332         struct worker *worker;
1333
1334         spin_lock_irq(&gcwq->lock);
1335
1336         worker = NULL;
1337         if (unlikely(cwq->worker && cwq->worker->current_work == work)) {
1338                 worker = cwq->worker;
1339                 insert_wq_barrier(cwq, &barr, work, worker);
1340         }
1341
1342         spin_unlock_irq(&gcwq->lock);
1343
1344         if (unlikely(worker)) {
1345                 wait_for_completion(&barr.done);
1346                 destroy_work_on_stack(&barr.work);
1347         }
1348 }
1349
1350 static void wait_on_work(struct work_struct *work)
1351 {
1352         struct cpu_workqueue_struct *cwq;
1353         struct workqueue_struct *wq;
1354         int cpu;
1355
1356         might_sleep();
1357
1358         lock_map_acquire(&work->lockdep_map);
1359         lock_map_release(&work->lockdep_map);
1360
1361         cwq = get_wq_data(work);
1362         if (!cwq)
1363                 return;
1364
1365         wq = cwq->wq;
1366
1367         for_each_possible_cpu(cpu)
1368                 wait_on_cpu_work(get_cwq(cpu, wq), work);
1369 }
1370
1371 static int __cancel_work_timer(struct work_struct *work,
1372                                 struct timer_list* timer)
1373 {
1374         int ret;
1375
1376         do {
1377                 ret = (timer && likely(del_timer(timer)));
1378                 if (!ret)
1379                         ret = try_to_grab_pending(work);
1380                 wait_on_work(work);
1381         } while (unlikely(ret < 0));
1382
1383         clear_wq_data(work);
1384         return ret;
1385 }
1386
1387 /**
1388  * cancel_work_sync - block until a work_struct's callback has terminated
1389  * @work: the work which is to be flushed
1390  *
1391  * Returns true if @work was pending.
1392  *
1393  * cancel_work_sync() will cancel the work if it is queued. If the work's
1394  * callback appears to be running, cancel_work_sync() will block until it
1395  * has completed.
1396  *
1397  * It is possible to use this function if the work re-queues itself. It can
1398  * cancel the work even if it migrates to another workqueue, however in that
1399  * case it only guarantees that work->func() has completed on the last queued
1400  * workqueue.
1401  *
1402  * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not
1403  * pending, otherwise it goes into a busy-wait loop until the timer expires.
1404  *
1405  * The caller must ensure that workqueue_struct on which this work was last
1406  * queued can't be destroyed before this function returns.
1407  */
1408 int cancel_work_sync(struct work_struct *work)
1409 {
1410         return __cancel_work_timer(work, NULL);
1411 }
1412 EXPORT_SYMBOL_GPL(cancel_work_sync);
1413
1414 /**
1415  * cancel_delayed_work_sync - reliably kill off a delayed work.
1416  * @dwork: the delayed work struct
1417  *
1418  * Returns true if @dwork was pending.
1419  *
1420  * It is possible to use this function if @dwork rearms itself via queue_work()
1421  * or queue_delayed_work(). See also the comment for cancel_work_sync().
1422  */
1423 int cancel_delayed_work_sync(struct delayed_work *dwork)
1424 {
1425         return __cancel_work_timer(&dwork->work, &dwork->timer);
1426 }
1427 EXPORT_SYMBOL(cancel_delayed_work_sync);
1428
1429 static struct workqueue_struct *keventd_wq __read_mostly;
1430
1431 /**
1432  * schedule_work - put work task in global workqueue
1433  * @work: job to be done
1434  *
1435  * Returns zero if @work was already on the kernel-global workqueue and
1436  * non-zero otherwise.
1437  *
1438  * This puts a job in the kernel-global workqueue if it was not already
1439  * queued and leaves it in the same position on the kernel-global
1440  * workqueue otherwise.
1441  */
1442 int schedule_work(struct work_struct *work)
1443 {
1444         return queue_work(keventd_wq, work);
1445 }
1446 EXPORT_SYMBOL(schedule_work);
1447
1448 /*
1449  * schedule_work_on - put work task on a specific cpu
1450  * @cpu: cpu to put the work task on
1451  * @work: job to be done
1452  *
1453  * This puts a job on a specific cpu
1454  */
1455 int schedule_work_on(int cpu, struct work_struct *work)
1456 {
1457         return queue_work_on(cpu, keventd_wq, work);
1458 }
1459 EXPORT_SYMBOL(schedule_work_on);
1460
1461 /**
1462  * schedule_delayed_work - put work task in global workqueue after delay
1463  * @dwork: job to be done
1464  * @delay: number of jiffies to wait or 0 for immediate execution
1465  *
1466  * After waiting for a given time this puts a job in the kernel-global
1467  * workqueue.
1468  */
1469 int schedule_delayed_work(struct delayed_work *dwork,
1470                                         unsigned long delay)
1471 {
1472         return queue_delayed_work(keventd_wq, dwork, delay);
1473 }
1474 EXPORT_SYMBOL(schedule_delayed_work);
1475
1476 /**
1477  * flush_delayed_work - block until a dwork_struct's callback has terminated
1478  * @dwork: the delayed work which is to be flushed
1479  *
1480  * Any timeout is cancelled, and any pending work is run immediately.
1481  */
1482 void flush_delayed_work(struct delayed_work *dwork)
1483 {
1484         if (del_timer_sync(&dwork->timer)) {
1485                 __queue_work(get_cpu(), get_wq_data(&dwork->work)->wq,
1486                              &dwork->work);
1487                 put_cpu();
1488         }
1489         flush_work(&dwork->work);
1490 }
1491 EXPORT_SYMBOL(flush_delayed_work);
1492
1493 /**
1494  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
1495  * @cpu: cpu to use
1496  * @dwork: job to be done
1497  * @delay: number of jiffies to wait
1498  *
1499  * After waiting for a given time this puts a job in the kernel-global
1500  * workqueue on the specified CPU.
1501  */
1502 int schedule_delayed_work_on(int cpu,
1503                         struct delayed_work *dwork, unsigned long delay)
1504 {
1505         return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
1506 }
1507 EXPORT_SYMBOL(schedule_delayed_work_on);
1508
1509 /**
1510  * schedule_on_each_cpu - call a function on each online CPU from keventd
1511  * @func: the function to call
1512  *
1513  * Returns zero on success.
1514  * Returns -ve errno on failure.
1515  *
1516  * schedule_on_each_cpu() is very slow.
1517  */
1518 int schedule_on_each_cpu(work_func_t func)
1519 {
1520         int cpu;
1521         int orig = -1;
1522         struct work_struct *works;
1523
1524         works = alloc_percpu(struct work_struct);
1525         if (!works)
1526                 return -ENOMEM;
1527
1528         get_online_cpus();
1529
1530         /*
1531          * When running in keventd don't schedule a work item on
1532          * itself.  Can just call directly because the work queue is
1533          * already bound.  This also is faster.
1534          */
1535         if (current_is_keventd())
1536                 orig = raw_smp_processor_id();
1537
1538         for_each_online_cpu(cpu) {
1539                 struct work_struct *work = per_cpu_ptr(works, cpu);
1540
1541                 INIT_WORK(work, func);
1542                 if (cpu != orig)
1543                         schedule_work_on(cpu, work);
1544         }
1545         if (orig >= 0)
1546                 func(per_cpu_ptr(works, orig));
1547
1548         for_each_online_cpu(cpu)
1549                 flush_work(per_cpu_ptr(works, cpu));
1550
1551         put_online_cpus();
1552         free_percpu(works);
1553         return 0;
1554 }
1555
1556 /**
1557  * flush_scheduled_work - ensure that any scheduled work has run to completion.
1558  *
1559  * Forces execution of the kernel-global workqueue and blocks until its
1560  * completion.
1561  *
1562  * Think twice before calling this function!  It's very easy to get into
1563  * trouble if you don't take great care.  Either of the following situations
1564  * will lead to deadlock:
1565  *
1566  *      One of the work items currently on the workqueue needs to acquire
1567  *      a lock held by your code or its caller.
1568  *
1569  *      Your code is running in the context of a work routine.
1570  *
1571  * They will be detected by lockdep when they occur, but the first might not
1572  * occur very often.  It depends on what work items are on the workqueue and
1573  * what locks they need, which you have no control over.
1574  *
1575  * In most situations flushing the entire workqueue is overkill; you merely
1576  * need to know that a particular work item isn't queued and isn't running.
1577  * In such cases you should use cancel_delayed_work_sync() or
1578  * cancel_work_sync() instead.
1579  */
1580 void flush_scheduled_work(void)
1581 {
1582         flush_workqueue(keventd_wq);
1583 }
1584 EXPORT_SYMBOL(flush_scheduled_work);
1585
1586 /**
1587  * execute_in_process_context - reliably execute the routine with user context
1588  * @fn:         the function to execute
1589  * @ew:         guaranteed storage for the execute work structure (must
1590  *              be available when the work executes)
1591  *
1592  * Executes the function immediately if process context is available,
1593  * otherwise schedules the function for delayed execution.
1594  *
1595  * Returns:     0 - function was executed
1596  *              1 - function was scheduled for execution
1597  */
1598 int execute_in_process_context(work_func_t fn, struct execute_work *ew)
1599 {
1600         if (!in_interrupt()) {
1601                 fn(&ew->work);
1602                 return 0;
1603         }
1604
1605         INIT_WORK(&ew->work, fn);
1606         schedule_work(&ew->work);
1607
1608         return 1;
1609 }
1610 EXPORT_SYMBOL_GPL(execute_in_process_context);
1611
1612 int keventd_up(void)
1613 {
1614         return keventd_wq != NULL;
1615 }
1616
1617 int current_is_keventd(void)
1618 {
1619         struct cpu_workqueue_struct *cwq;
1620         int cpu = raw_smp_processor_id(); /* preempt-safe: keventd is per-cpu */
1621         int ret = 0;
1622
1623         BUG_ON(!keventd_wq);
1624
1625         cwq = get_cwq(cpu, keventd_wq);
1626         if (current == cwq->worker->task)
1627                 ret = 1;
1628
1629         return ret;
1630
1631 }
1632
1633 static struct cpu_workqueue_struct *alloc_cwqs(void)
1634 {
1635         /*
1636          * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
1637          * Make sure that the alignment isn't lower than that of
1638          * unsigned long long.
1639          */
1640         const size_t size = sizeof(struct cpu_workqueue_struct);
1641         const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
1642                                    __alignof__(unsigned long long));
1643         struct cpu_workqueue_struct *cwqs;
1644 #ifndef CONFIG_SMP
1645         void *ptr;
1646
1647         /*
1648          * On UP, percpu allocator doesn't honor alignment parameter
1649          * and simply uses arch-dependent default.  Allocate enough
1650          * room to align cwq and put an extra pointer at the end
1651          * pointing back to the originally allocated pointer which
1652          * will be used for free.
1653          *
1654          * FIXME: This really belongs to UP percpu code.  Update UP
1655          * percpu code to honor alignment and remove this ugliness.
1656          */
1657         ptr = __alloc_percpu(size + align + sizeof(void *), 1);
1658         cwqs = PTR_ALIGN(ptr, align);
1659         *(void **)per_cpu_ptr(cwqs + 1, 0) = ptr;
1660 #else
1661         /* On SMP, percpu allocator can do it itself */
1662         cwqs = __alloc_percpu(size, align);
1663 #endif
1664         /* just in case, make sure it's actually aligned */
1665         BUG_ON(!IS_ALIGNED((unsigned long)cwqs, align));
1666         return cwqs;
1667 }
1668
1669 static void free_cwqs(struct cpu_workqueue_struct *cwqs)
1670 {
1671 #ifndef CONFIG_SMP
1672         /* on UP, the pointer to free is stored right after the cwq */
1673         if (cwqs)
1674                 free_percpu(*(void **)per_cpu_ptr(cwqs + 1, 0));
1675 #else
1676         free_percpu(cwqs);
1677 #endif
1678 }
1679
1680 struct workqueue_struct *__create_workqueue_key(const char *name,
1681                                                 unsigned int flags,
1682                                                 int max_active,
1683                                                 struct lock_class_key *key,
1684                                                 const char *lock_name)
1685 {
1686         bool singlethread = flags & WQ_SINGLE_THREAD;
1687         struct workqueue_struct *wq;
1688         bool failed = false;
1689         unsigned int cpu;
1690
1691         max_active = clamp_val(max_active, 1, INT_MAX);
1692
1693         wq = kzalloc(sizeof(*wq), GFP_KERNEL);
1694         if (!wq)
1695                 goto err;
1696
1697         wq->cpu_wq = alloc_cwqs();
1698         if (!wq->cpu_wq)
1699                 goto err;
1700
1701         wq->flags = flags;
1702         wq->saved_max_active = max_active;
1703         mutex_init(&wq->flush_mutex);
1704         atomic_set(&wq->nr_cwqs_to_flush, 0);
1705         INIT_LIST_HEAD(&wq->flusher_queue);
1706         INIT_LIST_HEAD(&wq->flusher_overflow);
1707         wq->name = name;
1708         lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
1709         INIT_LIST_HEAD(&wq->list);
1710
1711         cpu_maps_update_begin();
1712         /*
1713          * We must initialize cwqs for each possible cpu even if we
1714          * are going to call destroy_workqueue() finally. Otherwise
1715          * cpu_up() can hit the uninitialized cwq once we drop the
1716          * lock.
1717          */
1718         for_each_possible_cpu(cpu) {
1719                 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1720                 struct global_cwq *gcwq = get_gcwq(cpu);
1721
1722                 BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
1723                 cwq->gcwq = gcwq;
1724                 cwq->wq = wq;
1725                 cwq->flush_color = -1;
1726                 cwq->max_active = max_active;
1727                 INIT_LIST_HEAD(&cwq->worklist);
1728                 INIT_LIST_HEAD(&cwq->delayed_works);
1729
1730                 if (failed)
1731                         continue;
1732                 cwq->worker = create_worker(cwq,
1733                                             cpu_online(cpu) && !singlethread);
1734                 if (cwq->worker)
1735                         start_worker(cwq->worker);
1736                 else
1737                         failed = true;
1738         }
1739
1740         /*
1741          * workqueue_lock protects global freeze state and workqueues
1742          * list.  Grab it, set max_active accordingly and add the new
1743          * workqueue to workqueues list.
1744          */
1745         spin_lock(&workqueue_lock);
1746
1747         if (workqueue_freezing && wq->flags & WQ_FREEZEABLE)
1748                 for_each_possible_cpu(cpu)
1749                         get_cwq(cpu, wq)->max_active = 0;
1750
1751         list_add(&wq->list, &workqueues);
1752
1753         spin_unlock(&workqueue_lock);
1754
1755         cpu_maps_update_done();
1756
1757         if (failed) {
1758                 destroy_workqueue(wq);
1759                 wq = NULL;
1760         }
1761         return wq;
1762 err:
1763         if (wq) {
1764                 free_cwqs(wq->cpu_wq);
1765                 kfree(wq);
1766         }
1767         return NULL;
1768 }
1769 EXPORT_SYMBOL_GPL(__create_workqueue_key);
1770
1771 /**
1772  * destroy_workqueue - safely terminate a workqueue
1773  * @wq: target workqueue
1774  *
1775  * Safely destroy a workqueue. All work currently pending will be done first.
1776  */
1777 void destroy_workqueue(struct workqueue_struct *wq)
1778 {
1779         unsigned int cpu;
1780
1781         flush_workqueue(wq);
1782
1783         /*
1784          * wq list is used to freeze wq, remove from list after
1785          * flushing is complete in case freeze races us.
1786          */
1787         cpu_maps_update_begin();
1788         spin_lock(&workqueue_lock);
1789         list_del(&wq->list);
1790         spin_unlock(&workqueue_lock);
1791         cpu_maps_update_done();
1792
1793         for_each_possible_cpu(cpu) {
1794                 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1795                 int i;
1796
1797                 if (cwq->worker) {
1798                         spin_lock_irq(&cwq->gcwq->lock);
1799                         destroy_worker(cwq->worker);
1800                         cwq->worker = NULL;
1801                         spin_unlock_irq(&cwq->gcwq->lock);
1802                 }
1803
1804                 for (i = 0; i < WORK_NR_COLORS; i++)
1805                         BUG_ON(cwq->nr_in_flight[i]);
1806                 BUG_ON(cwq->nr_active);
1807                 BUG_ON(!list_empty(&cwq->delayed_works));
1808         }
1809
1810         free_cwqs(wq->cpu_wq);
1811         kfree(wq);
1812 }
1813 EXPORT_SYMBOL_GPL(destroy_workqueue);
1814
1815 static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
1816                                                 unsigned long action,
1817                                                 void *hcpu)
1818 {
1819         unsigned int cpu = (unsigned long)hcpu;
1820         struct cpu_workqueue_struct *cwq;
1821         struct workqueue_struct *wq;
1822
1823         action &= ~CPU_TASKS_FROZEN;
1824
1825         list_for_each_entry(wq, &workqueues, list) {
1826                 if (wq->flags & WQ_SINGLE_THREAD)
1827                         continue;
1828
1829                 cwq = get_cwq(cpu, wq);
1830
1831                 switch (action) {
1832                 case CPU_POST_DEAD:
1833                         flush_workqueue(wq);
1834                         break;
1835                 }
1836         }
1837
1838         return notifier_from_errno(0);
1839 }
1840
1841 #ifdef CONFIG_SMP
1842
1843 struct work_for_cpu {
1844         struct completion completion;
1845         long (*fn)(void *);
1846         void *arg;
1847         long ret;
1848 };
1849
1850 static int do_work_for_cpu(void *_wfc)
1851 {
1852         struct work_for_cpu *wfc = _wfc;
1853         wfc->ret = wfc->fn(wfc->arg);
1854         complete(&wfc->completion);
1855         return 0;
1856 }
1857
1858 /**
1859  * work_on_cpu - run a function in user context on a particular cpu
1860  * @cpu: the cpu to run on
1861  * @fn: the function to run
1862  * @arg: the function arg
1863  *
1864  * This will return the value @fn returns.
1865  * It is up to the caller to ensure that the cpu doesn't go offline.
1866  * The caller must not hold any locks which would prevent @fn from completing.
1867  */
1868 long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
1869 {
1870         struct task_struct *sub_thread;
1871         struct work_for_cpu wfc = {
1872                 .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
1873                 .fn = fn,
1874                 .arg = arg,
1875         };
1876
1877         sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
1878         if (IS_ERR(sub_thread))
1879                 return PTR_ERR(sub_thread);
1880         kthread_bind(sub_thread, cpu);
1881         wake_up_process(sub_thread);
1882         wait_for_completion(&wfc.completion);
1883         return wfc.ret;
1884 }
1885 EXPORT_SYMBOL_GPL(work_on_cpu);
1886 #endif /* CONFIG_SMP */
1887
1888 #ifdef CONFIG_FREEZER
1889
1890 /**
1891  * freeze_workqueues_begin - begin freezing workqueues
1892  *
1893  * Start freezing workqueues.  After this function returns, all
1894  * freezeable workqueues will queue new works to their frozen_works
1895  * list instead of the cwq ones.
1896  *
1897  * CONTEXT:
1898  * Grabs and releases workqueue_lock and gcwq->lock's.
1899  */
1900 void freeze_workqueues_begin(void)
1901 {
1902         struct workqueue_struct *wq;
1903         unsigned int cpu;
1904
1905         spin_lock(&workqueue_lock);
1906
1907         BUG_ON(workqueue_freezing);
1908         workqueue_freezing = true;
1909
1910         for_each_possible_cpu(cpu) {
1911                 struct global_cwq *gcwq = get_gcwq(cpu);
1912
1913                 spin_lock_irq(&gcwq->lock);
1914
1915                 list_for_each_entry(wq, &workqueues, list) {
1916                         struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1917
1918                         if (wq->flags & WQ_FREEZEABLE)
1919                                 cwq->max_active = 0;
1920                 }
1921
1922                 spin_unlock_irq(&gcwq->lock);
1923         }
1924
1925         spin_unlock(&workqueue_lock);
1926 }
1927
1928 /**
1929  * freeze_workqueues_busy - are freezeable workqueues still busy?
1930  *
1931  * Check whether freezing is complete.  This function must be called
1932  * between freeze_workqueues_begin() and thaw_workqueues().
1933  *
1934  * CONTEXT:
1935  * Grabs and releases workqueue_lock.
1936  *
1937  * RETURNS:
1938  * %true if some freezeable workqueues are still busy.  %false if
1939  * freezing is complete.
1940  */
1941 bool freeze_workqueues_busy(void)
1942 {
1943         struct workqueue_struct *wq;
1944         unsigned int cpu;
1945         bool busy = false;
1946
1947         spin_lock(&workqueue_lock);
1948
1949         BUG_ON(!workqueue_freezing);
1950
1951         for_each_possible_cpu(cpu) {
1952                 /*
1953                  * nr_active is monotonically decreasing.  It's safe
1954                  * to peek without lock.
1955                  */
1956                 list_for_each_entry(wq, &workqueues, list) {
1957                         struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1958
1959                         if (!(wq->flags & WQ_FREEZEABLE))
1960                                 continue;
1961
1962                         BUG_ON(cwq->nr_active < 0);
1963                         if (cwq->nr_active) {
1964                                 busy = true;
1965                                 goto out_unlock;
1966                         }
1967                 }
1968         }
1969 out_unlock:
1970         spin_unlock(&workqueue_lock);
1971         return busy;
1972 }
1973
1974 /**
1975  * thaw_workqueues - thaw workqueues
1976  *
1977  * Thaw workqueues.  Normal queueing is restored and all collected
1978  * frozen works are transferred to their respective cwq worklists.
1979  *
1980  * CONTEXT:
1981  * Grabs and releases workqueue_lock and gcwq->lock's.
1982  */
1983 void thaw_workqueues(void)
1984 {
1985         struct workqueue_struct *wq;
1986         unsigned int cpu;
1987
1988         spin_lock(&workqueue_lock);
1989
1990         if (!workqueue_freezing)
1991                 goto out_unlock;
1992
1993         for_each_possible_cpu(cpu) {
1994                 struct global_cwq *gcwq = get_gcwq(cpu);
1995
1996                 spin_lock_irq(&gcwq->lock);
1997
1998                 list_for_each_entry(wq, &workqueues, list) {
1999                         struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
2000
2001                         if (!(wq->flags & WQ_FREEZEABLE))
2002                                 continue;
2003
2004                         /* restore max_active and repopulate worklist */
2005                         cwq->max_active = wq->saved_max_active;
2006
2007                         while (!list_empty(&cwq->delayed_works) &&
2008                                cwq->nr_active < cwq->max_active)
2009                                 cwq_activate_first_delayed(cwq);
2010
2011                         wake_up_process(cwq->worker->task);
2012                 }
2013
2014                 spin_unlock_irq(&gcwq->lock);
2015         }
2016
2017         workqueue_freezing = false;
2018 out_unlock:
2019         spin_unlock(&workqueue_lock);
2020 }
2021 #endif /* CONFIG_FREEZER */
2022
2023 void __init init_workqueues(void)
2024 {
2025         unsigned int cpu;
2026         int i;
2027
2028         singlethread_cpu = cpumask_first(cpu_possible_mask);
2029         hotcpu_notifier(workqueue_cpu_callback, 0);
2030
2031         /* initialize gcwqs */
2032         for_each_possible_cpu(cpu) {
2033                 struct global_cwq *gcwq = get_gcwq(cpu);
2034
2035                 spin_lock_init(&gcwq->lock);
2036                 gcwq->cpu = cpu;
2037
2038                 INIT_LIST_HEAD(&gcwq->idle_list);
2039                 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
2040                         INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
2041
2042                 ida_init(&gcwq->worker_ida);
2043         }
2044
2045         keventd_wq = create_workqueue("events");
2046         BUG_ON(!keventd_wq);
2047 }