Btrfs: Allow worker threads to exit when idle
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / btrfs / async-thread.c
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19 #include <linux/kthread.h>
20 #include <linux/list.h>
21 #include <linux/spinlock.h>
22 #include <linux/freezer.h>
23 #include "async-thread.h"
24
25 #define WORK_QUEUED_BIT 0
26 #define WORK_DONE_BIT 1
27 #define WORK_ORDER_DONE_BIT 2
28 #define WORK_HIGH_PRIO_BIT 3
29
30 /*
31 * container for the kthread task pointer and the list of pending work
32 * One of these is allocated per thread.
33 */
34 struct btrfs_worker_thread {
35 /* pool we belong to */
36 struct btrfs_workers *workers;
37
38 /* list of struct btrfs_work that are waiting for service */
39 struct list_head pending;
40 struct list_head prio_pending;
41
42 /* list of worker threads from struct btrfs_workers */
43 struct list_head worker_list;
44
45 /* kthread */
46 struct task_struct *task;
47
48 /* number of things on the pending list */
49 atomic_t num_pending;
50
51 /* reference counter for this struct */
52 atomic_t refs;
53
54 unsigned long sequence;
55
56 /* protects the pending list. */
57 spinlock_t lock;
58
59 /* set to non-zero when this thread is already awake and kicking */
60 int working;
61
62 /* are we currently idle */
63 int idle;
64 };
65
66 /*
67 * helper function to move a thread onto the idle list after it
68 * has finished some requests.
69 */
70 static void check_idle_worker(struct btrfs_worker_thread *worker)
71 {
72 if (!worker->idle && atomic_read(&worker->num_pending) <
73 worker->workers->idle_thresh / 2) {
74 unsigned long flags;
75 spin_lock_irqsave(&worker->workers->lock, flags);
76 worker->idle = 1;
77 list_move(&worker->worker_list, &worker->workers->idle_list);
78 spin_unlock_irqrestore(&worker->workers->lock, flags);
79 }
80 }
81
82 /*
83 * helper function to move a thread off the idle list after new
84 * pending work is added.
85 */
86 static void check_busy_worker(struct btrfs_worker_thread *worker)
87 {
88 if (worker->idle && atomic_read(&worker->num_pending) >=
89 worker->workers->idle_thresh) {
90 unsigned long flags;
91 spin_lock_irqsave(&worker->workers->lock, flags);
92 worker->idle = 0;
93 list_move_tail(&worker->worker_list,
94 &worker->workers->worker_list);
95 spin_unlock_irqrestore(&worker->workers->lock, flags);
96 }
97 }
98
99 static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
100 {
101 struct btrfs_workers *workers = worker->workers;
102 unsigned long flags;
103
104 rmb();
105 if (!workers->atomic_start_pending)
106 return;
107
108 spin_lock_irqsave(&workers->lock, flags);
109 if (!workers->atomic_start_pending)
110 goto out;
111
112 workers->atomic_start_pending = 0;
113 if (workers->num_workers >= workers->max_workers)
114 goto out;
115
116 spin_unlock_irqrestore(&workers->lock, flags);
117 btrfs_start_workers(workers, 1);
118 return;
119
120 out:
121 spin_unlock_irqrestore(&workers->lock, flags);
122 }
123
124 static noinline int run_ordered_completions(struct btrfs_workers *workers,
125 struct btrfs_work *work)
126 {
127 unsigned long flags;
128
129 if (!workers->ordered)
130 return 0;
131
132 set_bit(WORK_DONE_BIT, &work->flags);
133
134 spin_lock_irqsave(&workers->lock, flags);
135
136 while (1) {
137 if (!list_empty(&workers->prio_order_list)) {
138 work = list_entry(workers->prio_order_list.next,
139 struct btrfs_work, order_list);
140 } else if (!list_empty(&workers->order_list)) {
141 work = list_entry(workers->order_list.next,
142 struct btrfs_work, order_list);
143 } else {
144 break;
145 }
146 if (!test_bit(WORK_DONE_BIT, &work->flags))
147 break;
148
149 /* we are going to call the ordered done function, but
150 * we leave the work item on the list as a barrier so
151 * that later work items that are done don't have their
152 * functions called before this one returns
153 */
154 if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
155 break;
156
157 spin_unlock_irqrestore(&workers->lock, flags);
158
159 work->ordered_func(work);
160
161 /* now take the lock again and call the freeing code */
162 spin_lock_irqsave(&workers->lock, flags);
163 list_del(&work->order_list);
164 work->ordered_free(work);
165 }
166
167 spin_unlock_irqrestore(&workers->lock, flags);
168 return 0;
169 }
170
171 static void put_worker(struct btrfs_worker_thread *worker)
172 {
173 if (atomic_dec_and_test(&worker->refs))
174 kfree(worker);
175 }
176
177 static int try_worker_shutdown(struct btrfs_worker_thread *worker)
178 {
179 int freeit = 0;
180
181 spin_lock_irq(&worker->lock);
182 spin_lock_irq(&worker->workers->lock);
183 if (worker->workers->num_workers > 1 &&
184 worker->idle &&
185 !worker->working &&
186 !list_empty(&worker->worker_list) &&
187 list_empty(&worker->prio_pending) &&
188 list_empty(&worker->pending)) {
189 freeit = 1;
190 list_del_init(&worker->worker_list);
191 worker->workers->num_workers--;
192 }
193 spin_unlock_irq(&worker->workers->lock);
194 spin_unlock_irq(&worker->lock);
195
196 if (freeit)
197 put_worker(worker);
198 return freeit;
199 }
200
201 /*
202 * main loop for servicing work items
203 */
204 static int worker_loop(void *arg)
205 {
206 struct btrfs_worker_thread *worker = arg;
207 struct list_head *cur;
208 struct btrfs_work *work;
209 do {
210 spin_lock_irq(&worker->lock);
211 again_locked:
212 while (1) {
213 if (!list_empty(&worker->prio_pending))
214 cur = worker->prio_pending.next;
215 else if (!list_empty(&worker->pending))
216 cur = worker->pending.next;
217 else
218 break;
219
220 work = list_entry(cur, struct btrfs_work, list);
221 list_del(&work->list);
222 clear_bit(WORK_QUEUED_BIT, &work->flags);
223
224 work->worker = worker;
225 spin_unlock_irq(&worker->lock);
226
227 work->func(work);
228
229 atomic_dec(&worker->num_pending);
230 /*
231 * unless this is an ordered work queue,
232 * 'work' was probably freed by func above.
233 */
234 run_ordered_completions(worker->workers, work);
235
236 check_pending_worker_creates(worker);
237
238 spin_lock_irq(&worker->lock);
239 check_idle_worker(worker);
240 }
241 if (freezing(current)) {
242 worker->working = 0;
243 spin_unlock_irq(&worker->lock);
244 refrigerator();
245 } else {
246 spin_unlock_irq(&worker->lock);
247 if (!kthread_should_stop()) {
248 cpu_relax();
249 /*
250 * we've dropped the lock, did someone else
251 * jump_in?
252 */
253 smp_mb();
254 if (!list_empty(&worker->pending) ||
255 !list_empty(&worker->prio_pending))
256 continue;
257
258 /*
259 * this short schedule allows more work to
260 * come in without the queue functions
261 * needing to go through wake_up_process()
262 *
263 * worker->working is still 1, so nobody
264 * is going to try and wake us up
265 */
266 schedule_timeout(1);
267 smp_mb();
268 if (!list_empty(&worker->pending) ||
269 !list_empty(&worker->prio_pending))
270 continue;
271
272 if (kthread_should_stop())
273 break;
274
275 /* still no more work?, sleep for real */
276 spin_lock_irq(&worker->lock);
277 set_current_state(TASK_INTERRUPTIBLE);
278 if (!list_empty(&worker->pending) ||
279 !list_empty(&worker->prio_pending))
280 goto again_locked;
281
282 /*
283 * this makes sure we get a wakeup when someone
284 * adds something new to the queue
285 */
286 worker->working = 0;
287 spin_unlock_irq(&worker->lock);
288
289 if (!kthread_should_stop()) {
290 schedule_timeout(HZ * 120);
291 if (!worker->working &&
292 try_worker_shutdown(worker)) {
293 return 0;
294 }
295 }
296 }
297 __set_current_state(TASK_RUNNING);
298 }
299 } while (!kthread_should_stop());
300 return 0;
301 }
302
303 /*
304 * this will wait for all the worker threads to shutdown
305 */
306 int btrfs_stop_workers(struct btrfs_workers *workers)
307 {
308 struct list_head *cur;
309 struct btrfs_worker_thread *worker;
310 int can_stop;
311
312 spin_lock_irq(&workers->lock);
313 list_splice_init(&workers->idle_list, &workers->worker_list);
314 while (!list_empty(&workers->worker_list)) {
315 cur = workers->worker_list.next;
316 worker = list_entry(cur, struct btrfs_worker_thread,
317 worker_list);
318
319 atomic_inc(&worker->refs);
320 workers->num_workers -= 1;
321 if (!list_empty(&worker->worker_list)) {
322 list_del_init(&worker->worker_list);
323 put_worker(worker);
324 can_stop = 1;
325 } else
326 can_stop = 0;
327 spin_unlock_irq(&workers->lock);
328 if (can_stop)
329 kthread_stop(worker->task);
330 spin_lock_irq(&workers->lock);
331 put_worker(worker);
332 }
333 spin_unlock_irq(&workers->lock);
334 return 0;
335 }
336
337 /*
338 * simple init on struct btrfs_workers
339 */
340 void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
341 {
342 workers->num_workers = 0;
343 INIT_LIST_HEAD(&workers->worker_list);
344 INIT_LIST_HEAD(&workers->idle_list);
345 INIT_LIST_HEAD(&workers->order_list);
346 INIT_LIST_HEAD(&workers->prio_order_list);
347 spin_lock_init(&workers->lock);
348 workers->max_workers = max;
349 workers->idle_thresh = 32;
350 workers->name = name;
351 workers->ordered = 0;
352 workers->atomic_start_pending = 0;
353 workers->atomic_worker_start = 0;
354 }
355
356 /*
357 * starts new worker threads. This does not enforce the max worker
358 * count in case you need to temporarily go past it.
359 */
360 int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
361 {
362 struct btrfs_worker_thread *worker;
363 int ret = 0;
364 int i;
365
366 for (i = 0; i < num_workers; i++) {
367 worker = kzalloc(sizeof(*worker), GFP_NOFS);
368 if (!worker) {
369 ret = -ENOMEM;
370 goto fail;
371 }
372
373 INIT_LIST_HEAD(&worker->pending);
374 INIT_LIST_HEAD(&worker->prio_pending);
375 INIT_LIST_HEAD(&worker->worker_list);
376 spin_lock_init(&worker->lock);
377 atomic_set(&worker->num_pending, 0);
378 atomic_set(&worker->refs, 1);
379 worker->workers = workers;
380 worker->task = kthread_run(worker_loop, worker,
381 "btrfs-%s-%d", workers->name,
382 workers->num_workers + i);
383 if (IS_ERR(worker->task)) {
384 ret = PTR_ERR(worker->task);
385 kfree(worker);
386 goto fail;
387 }
388 spin_lock_irq(&workers->lock);
389 list_add_tail(&worker->worker_list, &workers->idle_list);
390 worker->idle = 1;
391 workers->num_workers++;
392 spin_unlock_irq(&workers->lock);
393 }
394 return 0;
395 fail:
396 btrfs_stop_workers(workers);
397 return ret;
398 }
399
400 /*
401 * run through the list and find a worker thread that doesn't have a lot
402 * to do right now. This can return null if we aren't yet at the thread
403 * count limit and all of the threads are busy.
404 */
405 static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
406 {
407 struct btrfs_worker_thread *worker;
408 struct list_head *next;
409 int enforce_min = workers->num_workers < workers->max_workers;
410
411 /*
412 * if we find an idle thread, don't move it to the end of the
413 * idle list. This improves the chance that the next submission
414 * will reuse the same thread, and maybe catch it while it is still
415 * working
416 */
417 if (!list_empty(&workers->idle_list)) {
418 next = workers->idle_list.next;
419 worker = list_entry(next, struct btrfs_worker_thread,
420 worker_list);
421 return worker;
422 }
423 if (enforce_min || list_empty(&workers->worker_list))
424 return NULL;
425
426 /*
427 * if we pick a busy task, move the task to the end of the list.
428 * hopefully this will keep things somewhat evenly balanced.
429 * Do the move in batches based on the sequence number. This groups
430 * requests submitted at roughly the same time onto the same worker.
431 */
432 next = workers->worker_list.next;
433 worker = list_entry(next, struct btrfs_worker_thread, worker_list);
434 atomic_inc(&worker->num_pending);
435 worker->sequence++;
436
437 if (worker->sequence % workers->idle_thresh == 0)
438 list_move_tail(next, &workers->worker_list);
439 return worker;
440 }
441
442 /*
443 * selects a worker thread to take the next job. This will either find
444 * an idle worker, start a new worker up to the max count, or just return
445 * one of the existing busy workers.
446 */
447 static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
448 {
449 struct btrfs_worker_thread *worker;
450 unsigned long flags;
451 struct list_head *fallback;
452
453 again:
454 spin_lock_irqsave(&workers->lock, flags);
455 worker = next_worker(workers);
456 spin_unlock_irqrestore(&workers->lock, flags);
457
458 if (!worker) {
459 spin_lock_irqsave(&workers->lock, flags);
460 if (workers->num_workers >= workers->max_workers) {
461 goto fallback;
462 } else if (workers->atomic_worker_start) {
463 workers->atomic_start_pending = 1;
464 goto fallback;
465 } else {
466 spin_unlock_irqrestore(&workers->lock, flags);
467 /* we're below the limit, start another worker */
468 btrfs_start_workers(workers, 1);
469 goto again;
470 }
471 }
472 return worker;
473
474 fallback:
475 fallback = NULL;
476 /*
477 * we have failed to find any workers, just
478 * return the first one we can find.
479 */
480 if (!list_empty(&workers->worker_list))
481 fallback = workers->worker_list.next;
482 if (!list_empty(&workers->idle_list))
483 fallback = workers->idle_list.next;
484 BUG_ON(!fallback);
485 worker = list_entry(fallback,
486 struct btrfs_worker_thread, worker_list);
487 spin_unlock_irqrestore(&workers->lock, flags);
488 return worker;
489 }
490
491 /*
492 * btrfs_requeue_work just puts the work item back on the tail of the list
493 * it was taken from. It is intended for use with long running work functions
494 * that make some progress and want to give the cpu up for others.
495 */
496 int btrfs_requeue_work(struct btrfs_work *work)
497 {
498 struct btrfs_worker_thread *worker = work->worker;
499 unsigned long flags;
500 int wake = 0;
501
502 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
503 goto out;
504
505 spin_lock_irqsave(&worker->lock, flags);
506 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
507 list_add_tail(&work->list, &worker->prio_pending);
508 else
509 list_add_tail(&work->list, &worker->pending);
510 atomic_inc(&worker->num_pending);
511
512 /* by definition we're busy, take ourselves off the idle
513 * list
514 */
515 if (worker->idle) {
516 spin_lock(&worker->workers->lock);
517 worker->idle = 0;
518 list_move_tail(&worker->worker_list,
519 &worker->workers->worker_list);
520 spin_unlock(&worker->workers->lock);
521 }
522 if (!worker->working) {
523 wake = 1;
524 worker->working = 1;
525 }
526
527 if (wake)
528 wake_up_process(worker->task);
529 spin_unlock_irqrestore(&worker->lock, flags);
530 out:
531
532 return 0;
533 }
534
535 void btrfs_set_work_high_prio(struct btrfs_work *work)
536 {
537 set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
538 }
539
540 /*
541 * places a struct btrfs_work into the pending queue of one of the kthreads
542 */
543 int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
544 {
545 struct btrfs_worker_thread *worker;
546 unsigned long flags;
547 int wake = 0;
548
549 /* don't requeue something already on a list */
550 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
551 goto out;
552
553 worker = find_worker(workers);
554 if (workers->ordered) {
555 spin_lock_irqsave(&workers->lock, flags);
556 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
557 list_add_tail(&work->order_list,
558 &workers->prio_order_list);
559 } else {
560 list_add_tail(&work->order_list, &workers->order_list);
561 }
562 spin_unlock_irqrestore(&workers->lock, flags);
563 } else {
564 INIT_LIST_HEAD(&work->order_list);
565 }
566
567 spin_lock_irqsave(&worker->lock, flags);
568
569 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
570 list_add_tail(&work->list, &worker->prio_pending);
571 else
572 list_add_tail(&work->list, &worker->pending);
573 atomic_inc(&worker->num_pending);
574 check_busy_worker(worker);
575
576 /*
577 * avoid calling into wake_up_process if this thread has already
578 * been kicked
579 */
580 if (!worker->working)
581 wake = 1;
582 worker->working = 1;
583
584 if (wake)
585 wake_up_process(worker->task);
586 spin_unlock_irqrestore(&worker->lock, flags);
587
588 out:
589 return 0;
590 }