[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / btrfs / async-thread.c

/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

#include <linux/kthread.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/freezer.h>
#include "async-thread.h"

#define WORK_QUEUED_BIT 0
#define WORK_DONE_BIT 1
#define WORK_ORDER_DONE_BIT 2
#define WORK_HIGH_PRIO_BIT 3

/*
 * container for the kthread task pointer and the list of pending work
 * One of these is allocated per thread.
 */
struct btrfs_worker_thread {
	/* pool we belong to */
	struct btrfs_workers *workers;

	/* list of struct btrfs_work that are waiting for service */
	struct list_head pending;
	struct list_head prio_pending;

	/* list of worker threads from struct btrfs_workers */
	struct list_head worker_list;

	/* kthread */
	struct task_struct *task;

	/* number of things on the pending list */
	atomic_t num_pending;

	/* reference counter for this struct */
	atomic_t refs;

	unsigned long sequence;

	/* protects the pending list. */
	spinlock_t lock;

	/* set to non-zero when this thread is already awake and kicking */
	int working;

	/* are we currently idle */
	int idle;
};

static int __btrfs_start_workers(struct btrfs_workers *workers);

/*
 * btrfs_start_workers uses kthread_run, which can block waiting for memory
 * for a very long time.  It will actually throttle on page writeback,
 * and so it may not make progress until after our btrfs worker threads
 * process all of the pending work structs in their queue
 *
 * This means we can't use btrfs_start_workers from inside a btrfs worker
 * thread that is used as part of cleaning dirty memory, which pretty much
 * involves all of the worker threads.
 *
 * Instead we have a helper queue who never has more than one thread
 * where we scheduler thread start operations.  This worker_start struct
 * is used to contain the work and hold a pointer to the queue that needs
 * another worker.
 */
struct worker_start {
	struct btrfs_work work;
	struct btrfs_workers *queue;
};

static void start_new_worker_func(struct btrfs_work *work)
{
	struct worker_start *start;
	start = container_of(work, struct worker_start, work);
	__btrfs_start_workers(start->queue);
	kfree(start);
}

/*
 * helper function to move a thread onto the idle list after it
 * has finished some requests.
 */
static void check_idle_worker(struct btrfs_worker_thread *worker)
{
	if (!worker->idle && atomic_read(&worker->num_pending) <
	    worker->workers->idle_thresh / 2) {
		unsigned long flags;
		spin_lock_irqsave(&worker->workers->lock, flags);
		worker->idle = 1;

		/* the list may be empty if the worker is just starting */
		if (!list_empty(&worker->worker_list)) {
			list_move(&worker->worker_list,
				 &worker->workers->idle_list);
		}
		spin_unlock_irqrestore(&worker->workers->lock, flags);
	}
}

/*
 * helper function to move a thread off the idle list after new
 * pending work is added.
 */
static void check_busy_worker(struct btrfs_worker_thread *worker)
{
	if (worker->idle && atomic_read(&worker->num_pending) >=
	    worker->workers->idle_thresh) {
		unsigned long flags;
		spin_lock_irqsave(&worker->workers->lock, flags);
		worker->idle = 0;

		if (!list_empty(&worker->worker_list)) {
			list_move_tail(&worker->worker_list,
				      &worker->workers->worker_list);
		}
		spin_unlock_irqrestore(&worker->workers->lock, flags);
	}
}

static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
{
	struct btrfs_workers *workers = worker->workers;
	struct worker_start *start;
	unsigned long flags;

	rmb();
	if (!workers->atomic_start_pending)
		return;

	start = kzalloc(sizeof(*start), GFP_NOFS);
	if (!start)
		return;

	start->work.func = start_new_worker_func;
	start->queue = workers;

	spin_lock_irqsave(&workers->lock, flags);
	if (!workers->atomic_start_pending)
		goto out;

	workers->atomic_start_pending = 0;
	if (workers->num_workers + workers->num_workers_starting >=
	    workers->max_workers)
		goto out;

	workers->num_workers_starting += 1;
	spin_unlock_irqrestore(&workers->lock, flags);
	btrfs_queue_worker(workers->atomic_worker_start, &start->work);
	return;

out:
	kfree(start);
	spin_unlock_irqrestore(&workers->lock, flags);
}

static noinline void run_ordered_completions(struct btrfs_workers *workers,
					    struct btrfs_work *work)
{
	if (!workers->ordered)
		return;

	set_bit(WORK_DONE_BIT, &work->flags);

	spin_lock(&workers->order_lock);

	while (1) {
		if (!list_empty(&workers->prio_order_list)) {
			work = list_entry(workers->prio_order_list.next,
					  struct btrfs_work, order_list);
		} else if (!list_empty(&workers->order_list)) {
			work = list_entry(workers->order_list.next,
					  struct btrfs_work, order_list);
		} else {
			break;
		}
		if (!test_bit(WORK_DONE_BIT, &work->flags))
			break;

		/* we are going to call the ordered done function, but
		 * we leave the work item on the list as a barrier so
		 * that later work items that are done don't have their
		 * functions called before this one returns
		 */
		if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
			break;

		spin_unlock(&workers->order_lock);

		work->ordered_func(work);

		/* now take the lock again and drop our item from the list */
		spin_lock(&workers->order_lock);
		list_del(&work->order_list);
		spin_unlock(&workers->order_lock);

		/*
		 * we don't want to call the ordered free functions
		 * with the lock held though
		 */
		work->ordered_free(work);
		spin_lock(&workers->order_lock);
	}

	spin_unlock(&workers->order_lock);
}

static void put_worker(struct btrfs_worker_thread *worker)
{
	if (atomic_dec_and_test(&worker->refs))
		kfree(worker);
}

static int try_worker_shutdown(struct btrfs_worker_thread *worker)
{
	int freeit = 0;

	spin_lock_irq(&worker->lock);
	spin_lock(&worker->workers->lock);
	if (worker->workers->num_workers > 1 &&
	    worker->idle &&
	    !worker->working &&
	    !list_empty(&worker->worker_list) &&
	    list_empty(&worker->prio_pending) &&
	    list_empty(&worker->pending) &&
	    atomic_read(&worker->num_pending) == 0) {
		freeit = 1;
		list_del_init(&worker->worker_list);
		worker->workers->num_workers--;
	}
	spin_unlock(&worker->workers->lock);
	spin_unlock_irq(&worker->lock);

	if (freeit)
		put_worker(worker);
	return freeit;
}

static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
					struct list_head *prio_head,
					struct list_head *head)
{
	struct btrfs_work *work = NULL;
	struct list_head *cur = NULL;

	if(!list_empty(prio_head))
		cur = prio_head->next;

	smp_mb();
	if (!list_empty(&worker->prio_pending))
		goto refill;

	if (!list_empty(head))
		cur = head->next;

	if (cur)
		goto out;

refill:
	spin_lock_irq(&worker->lock);
	list_splice_tail_init(&worker->prio_pending, prio_head);
	list_splice_tail_init(&worker->pending, head);

	if (!list_empty(prio_head))
		cur = prio_head->next;
	else if (!list_empty(head))
		cur = head->next;
	spin_unlock_irq(&worker->lock);

	if (!cur)
		goto out_fail;

out:
	work = list_entry(cur, struct btrfs_work, list);

out_fail:
	return work;
}

/*
 * main loop for servicing work items
 */
static int worker_loop(void *arg)
{
	struct btrfs_worker_thread *worker = arg;
	struct list_head head;
	struct list_head prio_head;
	struct btrfs_work *work;

	INIT_LIST_HEAD(&head);
	INIT_LIST_HEAD(&prio_head);

	set_freezable();

	do {
again:
		while (1) {


			work = get_next_work(worker, &prio_head, &head);
			if (!work)
				break;

			list_del(&work->list);
			clear_bit(WORK_QUEUED_BIT, &work->flags);

			work->worker = worker;

			work->func(work);

			atomic_dec(&worker->num_pending);
			/*
			 * unless this is an ordered work queue,
			 * 'work' was probably freed by func above.
			 */
			run_ordered_completions(worker->workers, work);

			check_pending_worker_creates(worker);
			cond_resched();
		}

		spin_lock_irq(&worker->lock);
		check_idle_worker(worker);

		if (freezing(current)) {
			worker->working = 0;
			spin_unlock_irq(&worker->lock);
			try_to_freeze();
		} else {
			spin_unlock_irq(&worker->lock);
			if (!kthread_freezable_should_stop(NULL)) {
				cpu_relax();
				/*
				 * we've dropped the lock, did someone else
				 * jump_in?
				 */
				smp_mb();
				if (!list_empty(&worker->pending) ||
				    !list_empty(&worker->prio_pending))
					continue;

				/*
				 * this short schedule allows more work to
				 * come in without the queue functions
				 * needing to go through wake_up_process()
				 *
				 * worker->working is still 1, so nobody
				 * is going to try and wake us up
				 */
				schedule_timeout(1);
				smp_mb();
				if (!list_empty(&worker->pending) ||
				    !list_empty(&worker->prio_pending))
					continue;

				if (kthread_freezable_should_stop(NULL))
					break;

				/* still no more work?, sleep for real */
				spin_lock_irq(&worker->lock);
				set_current_state(TASK_INTERRUPTIBLE);
				if (!list_empty(&worker->pending) ||
				    !list_empty(&worker->prio_pending)) {
					spin_unlock_irq(&worker->lock);
					set_current_state(TASK_RUNNING);
					goto again;
				}

				/*
				 * this makes sure we get a wakeup when someone
				 * adds something new to the queue
				 */
				worker->working = 0;
				spin_unlock_irq(&worker->lock);

				if (!kthread_freezable_should_stop(NULL)) {
					schedule_timeout(HZ * 120);
					if (!worker->working &&
					    try_worker_shutdown(worker)) {
						return 0;
					}
				}
			}
			__set_current_state(TASK_RUNNING);
		}
	} while (!kthread_freezable_should_stop(NULL));
	return 0;
}

/*
 * this will wait for all the worker threads to shutdown
 */
void btrfs_stop_workers(struct btrfs_workers *workers)
{
	struct list_head *cur;
	struct btrfs_worker_thread *worker;
	int can_stop;

	spin_lock_irq(&workers->lock);
	list_splice_init(&workers->idle_list, &workers->worker_list);
	while (!list_empty(&workers->worker_list)) {
		cur = workers->worker_list.next;
		worker = list_entry(cur, struct btrfs_worker_thread,
				    worker_list);

		atomic_inc(&worker->refs);
		workers->num_workers -= 1;
		if (!list_empty(&worker->worker_list)) {
			list_del_init(&worker->worker_list);
			put_worker(worker);
			can_stop = 1;
		} else
			can_stop = 0;
		spin_unlock_irq(&workers->lock);
		if (can_stop)
			kthread_stop(worker->task);
		spin_lock_irq(&workers->lock);
		put_worker(worker);
	}
	spin_unlock_irq(&workers->lock);
}

/*
 * simple init on struct btrfs_workers
 */
void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
			struct btrfs_workers *async_helper)
{
	workers->num_workers = 0;
	workers->num_workers_starting = 0;
	INIT_LIST_HEAD(&workers->worker_list);
	INIT_LIST_HEAD(&workers->idle_list);
	INIT_LIST_HEAD(&workers->order_list);
	INIT_LIST_HEAD(&workers->prio_order_list);
	spin_lock_init(&workers->lock);
	spin_lock_init(&workers->order_lock);
	workers->max_workers = max;
	workers->idle_thresh = 32;
	workers->name = name;
	workers->ordered = 0;
	workers->atomic_start_pending = 0;
	workers->atomic_worker_start = async_helper;
}

/*
 * starts new worker threads.  This does not enforce the max worker
 * count in case you need to temporarily go past it.
 */
static int __btrfs_start_workers(struct btrfs_workers *workers)
{
	struct btrfs_worker_thread *worker;
	int ret = 0;

	worker = kzalloc(sizeof(*worker), GFP_NOFS);
	if (!worker) {
		ret = -ENOMEM;
		goto fail;
	}

	INIT_LIST_HEAD(&worker->pending);
	INIT_LIST_HEAD(&worker->prio_pending);
	INIT_LIST_HEAD(&worker->worker_list);
	spin_lock_init(&worker->lock);

	atomic_set(&worker->num_pending, 0);
	atomic_set(&worker->refs, 1);
	worker->workers = workers;
	worker->task = kthread_run(worker_loop, worker,
				   "btrfs-%s-%d", workers->name,
				   workers->num_workers + 1);
	if (IS_ERR(worker->task)) {
		ret = PTR_ERR(worker->task);
		kfree(worker);
		goto fail;
	}
	spin_lock_irq(&workers->lock);
	list_add_tail(&worker->worker_list, &workers->idle_list);
	worker->idle = 1;
	workers->num_workers++;
	workers->num_workers_starting--;
	WARN_ON(workers->num_workers_starting < 0);
	spin_unlock_irq(&workers->lock);

	return 0;
fail:
	spin_lock_irq(&workers->lock);
	workers->num_workers_starting--;
	spin_unlock_irq(&workers->lock);
	return ret;
}

int btrfs_start_workers(struct btrfs_workers *workers)
{
	spin_lock_irq(&workers->lock);
	workers->num_workers_starting++;
	spin_unlock_irq(&workers->lock);
	return __btrfs_start_workers(workers);
}

/*
 * run through the list and find a worker thread that doesn't have a lot
 * to do right now.  This can return null if we aren't yet at the thread
 * count limit and all of the threads are busy.
 */
static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
{
	struct btrfs_worker_thread *worker;
	struct list_head *next;
	int enforce_min;

	enforce_min = (workers->num_workers + workers->num_workers_starting) <
		workers->max_workers;

	/*
	 * if we find an idle thread, don't move it to the end of the
	 * idle list.  This improves the chance that the next submission
	 * will reuse the same thread, and maybe catch it while it is still
	 * working
	 */
	if (!list_empty(&workers->idle_list)) {
		next = workers->idle_list.next;
		worker = list_entry(next, struct btrfs_worker_thread,
				    worker_list);
		return worker;
	}
	if (enforce_min || list_empty(&workers->worker_list))
		return NULL;

	/*
	 * if we pick a busy task, move the task to the end of the list.
	 * hopefully this will keep things somewhat evenly balanced.
	 * Do the move in batches based on the sequence number.  This groups
	 * requests submitted at roughly the same time onto the same worker.
	 */
	next = workers->worker_list.next;
	worker = list_entry(next, struct btrfs_worker_thread, worker_list);
	worker->sequence++;

	if (worker->sequence % workers->idle_thresh == 0)
		list_move_tail(next, &workers->worker_list);
	return worker;
}

/*
 * selects a worker thread to take the next job.  This will either find
 * an idle worker, start a new worker up to the max count, or just return
 * one of the existing busy workers.
 */
static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
{
	struct btrfs_worker_thread *worker;
	unsigned long flags;
	struct list_head *fallback;
	int ret;

	spin_lock_irqsave(&workers->lock, flags);
again:
	worker = next_worker(workers);

	if (!worker) {
		if (workers->num_workers + workers->num_workers_starting >=
		    workers->max_workers) {
			goto fallback;
		} else if (workers->atomic_worker_start) {
			workers->atomic_start_pending = 1;
			goto fallback;
		} else {
			workers->num_workers_starting++;
			spin_unlock_irqrestore(&workers->lock, flags);
			/* we're below the limit, start another worker */
			ret = __btrfs_start_workers(workers);
			spin_lock_irqsave(&workers->lock, flags);
			if (ret)
				goto fallback;
			goto again;
		}
	}
	goto found;

fallback:
	fallback = NULL;
	/*
	 * we have failed to find any workers, just
	 * return the first one we can find.
	 */
	if (!list_empty(&workers->worker_list))
		fallback = workers->worker_list.next;
	if (!list_empty(&workers->idle_list))
		fallback = workers->idle_list.next;
	BUG_ON(!fallback);
	worker = list_entry(fallback,
		  struct btrfs_worker_thread, worker_list);
found:
	/*
	 * this makes sure the worker doesn't exit before it is placed
	 * onto a busy/idle list
	 */
	atomic_inc(&worker->num_pending);
	spin_unlock_irqrestore(&workers->lock, flags);
	return worker;
}

/*
 * btrfs_requeue_work just puts the work item back on the tail of the list
 * it was taken from.  It is intended for use with long running work functions
 * that make some progress and want to give the cpu up for others.
 */
void btrfs_requeue_work(struct btrfs_work *work)
{
	struct btrfs_worker_thread *worker = work->worker;
	unsigned long flags;
	int wake = 0;

	if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
		return;

	spin_lock_irqsave(&worker->lock, flags);
	if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
		list_add_tail(&work->list, &worker->prio_pending);
	else
		list_add_tail(&work->list, &worker->pending);
	atomic_inc(&worker->num_pending);

	/* by definition we're busy, take ourselves off the idle
	 * list
	 */
	if (worker->idle) {
		spin_lock(&worker->workers->lock);
		worker->idle = 0;
		list_move_tail(&worker->worker_list,
			      &worker->workers->worker_list);
		spin_unlock(&worker->workers->lock);
	}
	if (!worker->working) {
		wake = 1;
		worker->working = 1;
	}

	if (wake)
		wake_up_process(worker->task);
	spin_unlock_irqrestore(&worker->lock, flags);
}

void btrfs_set_work_high_prio(struct btrfs_work *work)
{
	set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
}

/*
 * places a struct btrfs_work into the pending queue of one of the kthreads
 */
void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
{
	struct btrfs_worker_thread *worker;
	unsigned long flags;
	int wake = 0;

	/* don't requeue something already on a list */
	if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
		return;

	worker = find_worker(workers);
	if (workers->ordered) {
		/*
		 * you're not allowed to do ordered queues from an
		 * interrupt handler
		 */
		spin_lock(&workers->order_lock);
		if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
			list_add_tail(&work->order_list,
				      &workers->prio_order_list);
		} else {
			list_add_tail(&work->order_list, &workers->order_list);
		}
		spin_unlock(&workers->order_lock);
	} else {
		INIT_LIST_HEAD(&work->order_list);
	}

	spin_lock_irqsave(&worker->lock, flags);

	if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
		list_add_tail(&work->list, &worker->prio_pending);
	else
		list_add_tail(&work->list, &worker->pending);
	check_busy_worker(worker);

	/*
	 * avoid calling into wake_up_process if this thread has already
	 * been kicked
	 */
	if (!worker->working)
		wake = 1;
	worker->working = 1;

	if (wake)
		wake_up_process(worker->task);
	spin_unlock_irqrestore(&worker->lock, flags);
}
Commit	Line	Data
	1	/*
	2	* Copyright (C) 2007 Oracle. All rights reserved.
	3	*
	4	* This program is free software; you can redistribute it and/or
	5	* modify it under the terms of the GNU General Public
	6	* License v2 as published by the Free Software Foundation.
	7	*
	8	* This program is distributed in the hope that it will be useful,
	9	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	11	* General Public License for more details.
	12	*
	13	* You should have received a copy of the GNU General Public
	14	* License along with this program; if not, write to the
	15	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
	16	* Boston, MA 021110-1307, USA.
	17	*/
	18
	19	#include <linux/kthread.h>
	20	#include <linux/slab.h>
	21	#include <linux/list.h>
	22	#include <linux/spinlock.h>
	23	#include <linux/freezer.h>
	24	#include "async-thread.h"
	25
	26	#define WORK_QUEUED_BIT 0
	27	#define WORK_DONE_BIT 1
	28	#define WORK_ORDER_DONE_BIT 2
	29	#define WORK_HIGH_PRIO_BIT 3
	30
	31	/*
	32	* container for the kthread task pointer and the list of pending work
	33	* One of these is allocated per thread.
	34	*/
	35	struct btrfs_worker_thread {
	36	/* pool we belong to */
	37	struct btrfs_workers *workers;
	38
	39	/* list of struct btrfs_work that are waiting for service */
	40	struct list_head pending;
	41	struct list_head prio_pending;
	42
	43	/* list of worker threads from struct btrfs_workers */
	44	struct list_head worker_list;
	45
	46	/* kthread */
	47	struct task_struct *task;
	48
	49	/* number of things on the pending list */
	50	atomic_t num_pending;
	51
	52	/* reference counter for this struct */
	53	atomic_t refs;
	54
	55	unsigned long sequence;
	56
	57	/* protects the pending list. */
	58	spinlock_t lock;
	59
	60	/* set to non-zero when this thread is already awake and kicking */
	61	int working;
	62
	63	/* are we currently idle */
	64	int idle;
	65	};
	66
	67	static int __btrfs_start_workers(struct btrfs_workers *workers);
	68
	69	/*
	70	* btrfs_start_workers uses kthread_run, which can block waiting for memory
	71	* for a very long time. It will actually throttle on page writeback,
	72	* and so it may not make progress until after our btrfs worker threads
	73	* process all of the pending work structs in their queue
	74	*
	75	* This means we can't use btrfs_start_workers from inside a btrfs worker
	76	* thread that is used as part of cleaning dirty memory, which pretty much
	77	* involves all of the worker threads.
	78	*
	79	* Instead we have a helper queue who never has more than one thread
	80	* where we scheduler thread start operations. This worker_start struct
	81	* is used to contain the work and hold a pointer to the queue that needs
	82	* another worker.
	83	*/
	84	struct worker_start {
	85	struct btrfs_work work;
	86	struct btrfs_workers *queue;
	87	};
	88
	89	static void start_new_worker_func(struct btrfs_work *work)
	90	{
	91	struct worker_start *start;
	92	start = container_of(work, struct worker_start, work);
	93	__btrfs_start_workers(start->queue);
	94	kfree(start);
	95	}
	96
	97	/*
	98	* helper function to move a thread onto the idle list after it
	99	* has finished some requests.
	100	*/
	101	static void check_idle_worker(struct btrfs_worker_thread *worker)
	102	{
	103	if (!worker->idle && atomic_read(&worker->num_pending) <
	104	worker->workers->idle_thresh / 2) {
	105	unsigned long flags;
	106	spin_lock_irqsave(&worker->workers->lock, flags);
	107	worker->idle = 1;
	108
	109	/* the list may be empty if the worker is just starting */
	110	if (!list_empty(&worker->worker_list)) {
	111	list_move(&worker->worker_list,
	112	&worker->workers->idle_list);
	113	}
	114	spin_unlock_irqrestore(&worker->workers->lock, flags);
	115	}
	116	}
	117
	118	/*
	119	* helper function to move a thread off the idle list after new
	120	* pending work is added.
	121	*/
	122	static void check_busy_worker(struct btrfs_worker_thread *worker)
	123	{
	124	if (worker->idle && atomic_read(&worker->num_pending) >=
	125	worker->workers->idle_thresh) {
	126	unsigned long flags;
	127	spin_lock_irqsave(&worker->workers->lock, flags);
	128	worker->idle = 0;
	129
	130	if (!list_empty(&worker->worker_list)) {
	131	list_move_tail(&worker->worker_list,
	132	&worker->workers->worker_list);
	133	}
	134	spin_unlock_irqrestore(&worker->workers->lock, flags);
	135	}
	136	}
	137
	138	static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
	139	{
	140	struct btrfs_workers *workers = worker->workers;
	141	struct worker_start *start;
	142	unsigned long flags;
	143
	144	rmb();
	145	if (!workers->atomic_start_pending)
	146	return;
	147
	148	start = kzalloc(sizeof(*start), GFP_NOFS);
	149	if (!start)
	150	return;
	151
	152	start->work.func = start_new_worker_func;
	153	start->queue = workers;
	154
	155	spin_lock_irqsave(&workers->lock, flags);
	156	if (!workers->atomic_start_pending)
	157	goto out;
	158
	159	workers->atomic_start_pending = 0;
	160	if (workers->num_workers + workers->num_workers_starting >=
	161	workers->max_workers)
	162	goto out;
	163
	164	workers->num_workers_starting += 1;
	165	spin_unlock_irqrestore(&workers->lock, flags);
	166	btrfs_queue_worker(workers->atomic_worker_start, &start->work);
	167	return;
	168
	169	out:
	170	kfree(start);
	171	spin_unlock_irqrestore(&workers->lock, flags);
	172	}
	173
	174	static noinline void run_ordered_completions(struct btrfs_workers *workers,
	175	struct btrfs_work *work)
	176	{
	177	if (!workers->ordered)
	178	return;
	179
	180	set_bit(WORK_DONE_BIT, &work->flags);
	181
	182	spin_lock(&workers->order_lock);
	183
	184	while (1) {
	185	if (!list_empty(&workers->prio_order_list)) {
	186	work = list_entry(workers->prio_order_list.next,
	187	struct btrfs_work, order_list);
	188	} else if (!list_empty(&workers->order_list)) {
	189	work = list_entry(workers->order_list.next,
	190	struct btrfs_work, order_list);
	191	} else {
	192	break;
	193	}
	194	if (!test_bit(WORK_DONE_BIT, &work->flags))
	195	break;
	196
	197	/* we are going to call the ordered done function, but
	198	* we leave the work item on the list as a barrier so
	199	* that later work items that are done don't have their
	200	* functions called before this one returns
	201	*/
	202	if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
	203	break;
	204
	205	spin_unlock(&workers->order_lock);
	206
	207	work->ordered_func(work);
	208
	209	/* now take the lock again and drop our item from the list */
	210	spin_lock(&workers->order_lock);
	211	list_del(&work->order_list);
	212	spin_unlock(&workers->order_lock);
	213
	214	/*
	215	* we don't want to call the ordered free functions
	216	* with the lock held though
	217	*/
	218	work->ordered_free(work);
	219	spin_lock(&workers->order_lock);
	220	}
	221
	222	spin_unlock(&workers->order_lock);
	223	}
	224
	225	static void put_worker(struct btrfs_worker_thread *worker)
	226	{
	227	if (atomic_dec_and_test(&worker->refs))
	228	kfree(worker);
	229	}
	230
	231	static int try_worker_shutdown(struct btrfs_worker_thread *worker)
	232	{
	233	int freeit = 0;
	234
	235	spin_lock_irq(&worker->lock);
	236	spin_lock(&worker->workers->lock);
	237	if (worker->workers->num_workers > 1 &&
	238	worker->idle &&
	239	!worker->working &&
	240	!list_empty(&worker->worker_list) &&
	241	list_empty(&worker->prio_pending) &&
	242	list_empty(&worker->pending) &&
	243	atomic_read(&worker->num_pending) == 0) {
	244	freeit = 1;
	245	list_del_init(&worker->worker_list);
	246	worker->workers->num_workers--;
	247	}
	248	spin_unlock(&worker->workers->lock);
	249	spin_unlock_irq(&worker->lock);
	250
	251	if (freeit)
	252	put_worker(worker);
	253	return freeit;
	254	}
	255
	256	static struct btrfs_work get_next_work(struct btrfs_worker_thread worker,
	257	struct list_head *prio_head,
	258	struct list_head *head)
	259	{
	260	struct btrfs_work *work = NULL;
	261	struct list_head *cur = NULL;
	262
	263	if(!list_empty(prio_head))
	264	cur = prio_head->next;
	265
	266	smp_mb();
	267	if (!list_empty(&worker->prio_pending))
	268	goto refill;
	269
	270	if (!list_empty(head))
	271	cur = head->next;
	272
	273	if (cur)
	274	goto out;
	275
	276	refill:
	277	spin_lock_irq(&worker->lock);
	278	list_splice_tail_init(&worker->prio_pending, prio_head);
	279	list_splice_tail_init(&worker->pending, head);
	280
	281	if (!list_empty(prio_head))
	282	cur = prio_head->next;
	283	else if (!list_empty(head))
	284	cur = head->next;
	285	spin_unlock_irq(&worker->lock);
	286
	287	if (!cur)
	288	goto out_fail;
	289
	290	out:
	291	work = list_entry(cur, struct btrfs_work, list);
	292
	293	out_fail:
	294	return work;
	295	}
	296
	297	/*
	298	* main loop for servicing work items
	299	*/
	300	static int worker_loop(void *arg)
	301	{
	302	struct btrfs_worker_thread *worker = arg;
	303	struct list_head head;
	304	struct list_head prio_head;
	305	struct btrfs_work *work;
	306
	307	INIT_LIST_HEAD(&head);
	308	INIT_LIST_HEAD(&prio_head);
	309
	310	set_freezable();
	311
	312	do {
	313	again:
	314	while (1) {
	315
	316
	317	work = get_next_work(worker, &prio_head, &head);
	318	if (!work)
	319	break;
	320
	321	list_del(&work->list);
	322	clear_bit(WORK_QUEUED_BIT, &work->flags);
	323
	324	work->worker = worker;
	325
	326	work->func(work);
	327
	328	atomic_dec(&worker->num_pending);
	329	/*
	330	* unless this is an ordered work queue,
	331	* 'work' was probably freed by func above.
	332	*/
	333	run_ordered_completions(worker->workers, work);
	334
	335	check_pending_worker_creates(worker);
	336	cond_resched();
	337	}
	338
	339	spin_lock_irq(&worker->lock);
	340	check_idle_worker(worker);
	341
	342	if (freezing(current)) {
	343	worker->working = 0;
	344	spin_unlock_irq(&worker->lock);
	345	try_to_freeze();
	346	} else {
	347	spin_unlock_irq(&worker->lock);
	348	if (!kthread_freezable_should_stop(NULL)) {
	349	cpu_relax();
	350	/*
	351	* we've dropped the lock, did someone else
	352	* jump_in?
	353	*/
	354	smp_mb();
	355	if (!list_empty(&worker->pending) \|\|
	356	!list_empty(&worker->prio_pending))
	357	continue;
	358
	359	/*
	360	* this short schedule allows more work to
	361	* come in without the queue functions
	362	* needing to go through wake_up_process()
	363	*
	364	* worker->working is still 1, so nobody
	365	* is going to try and wake us up
	366	*/
	367	schedule_timeout(1);
	368	smp_mb();
	369	if (!list_empty(&worker->pending) \|\|
	370	!list_empty(&worker->prio_pending))
	371	continue;
	372
	373	if (kthread_freezable_should_stop(NULL))
	374	break;
	375
	376	/* still no more work?, sleep for real */
	377	spin_lock_irq(&worker->lock);
	378	set_current_state(TASK_INTERRUPTIBLE);
	379	if (!list_empty(&worker->pending) \|\|
	380	!list_empty(&worker->prio_pending)) {
	381	spin_unlock_irq(&worker->lock);
	382	set_current_state(TASK_RUNNING);
	383	goto again;
	384	}
	385
	386	/*
	387	* this makes sure we get a wakeup when someone
	388	* adds something new to the queue
	389	*/
	390	worker->working = 0;
	391	spin_unlock_irq(&worker->lock);
	392
	393	if (!kthread_freezable_should_stop(NULL)) {
	394	schedule_timeout(HZ * 120);
	395	if (!worker->working &&
	396	try_worker_shutdown(worker)) {
	397	return 0;
	398	}
	399	}
	400	}
	401	__set_current_state(TASK_RUNNING);
	402	}
	403	} while (!kthread_freezable_should_stop(NULL));
	404	return 0;
	405	}
	406
	407	/*
	408	* this will wait for all the worker threads to shutdown
	409	*/
	410	void btrfs_stop_workers(struct btrfs_workers *workers)
	411	{
	412	struct list_head *cur;
	413	struct btrfs_worker_thread *worker;
	414	int can_stop;
	415
	416	spin_lock_irq(&workers->lock);
	417	list_splice_init(&workers->idle_list, &workers->worker_list);
	418	while (!list_empty(&workers->worker_list)) {
	419	cur = workers->worker_list.next;
	420	worker = list_entry(cur, struct btrfs_worker_thread,
	421	worker_list);
	422
	423	atomic_inc(&worker->refs);
	424	workers->num_workers -= 1;
	425	if (!list_empty(&worker->worker_list)) {
	426	list_del_init(&worker->worker_list);
	427	put_worker(worker);
	428	can_stop = 1;
	429	} else
	430	can_stop = 0;
	431	spin_unlock_irq(&workers->lock);
	432	if (can_stop)
	433	kthread_stop(worker->task);
	434	spin_lock_irq(&workers->lock);
	435	put_worker(worker);
	436	}
	437	spin_unlock_irq(&workers->lock);
	438	}
	439
	440	/*
	441	* simple init on struct btrfs_workers
	442	*/
	443	void btrfs_init_workers(struct btrfs_workers workers, char name, int max,
	444	struct btrfs_workers *async_helper)
	445	{
	446	workers->num_workers = 0;
	447	workers->num_workers_starting = 0;
	448	INIT_LIST_HEAD(&workers->worker_list);
	449	INIT_LIST_HEAD(&workers->idle_list);
	450	INIT_LIST_HEAD(&workers->order_list);
	451	INIT_LIST_HEAD(&workers->prio_order_list);
	452	spin_lock_init(&workers->lock);
	453	spin_lock_init(&workers->order_lock);
	454	workers->max_workers = max;
	455	workers->idle_thresh = 32;
	456	workers->name = name;
	457	workers->ordered = 0;
	458	workers->atomic_start_pending = 0;
	459	workers->atomic_worker_start = async_helper;
	460	}
	461
	462	/*
	463	* starts new worker threads. This does not enforce the max worker
	464	* count in case you need to temporarily go past it.
	465	*/
	466	static int __btrfs_start_workers(struct btrfs_workers *workers)
	467	{
	468	struct btrfs_worker_thread *worker;
	469	int ret = 0;
	470
	471	worker = kzalloc(sizeof(*worker), GFP_NOFS);
	472	if (!worker) {
	473	ret = -ENOMEM;
	474	goto fail;
	475	}
	476
	477	INIT_LIST_HEAD(&worker->pending);
	478	INIT_LIST_HEAD(&worker->prio_pending);
	479	INIT_LIST_HEAD(&worker->worker_list);
	480	spin_lock_init(&worker->lock);
	481
	482	atomic_set(&worker->num_pending, 0);
	483	atomic_set(&worker->refs, 1);
	484	worker->workers = workers;
	485	worker->task = kthread_run(worker_loop, worker,
	486	"btrfs-%s-%d", workers->name,
	487	workers->num_workers + 1);
	488	if (IS_ERR(worker->task)) {
	489	ret = PTR_ERR(worker->task);
	490	kfree(worker);
	491	goto fail;
	492	}
	493	spin_lock_irq(&workers->lock);
	494	list_add_tail(&worker->worker_list, &workers->idle_list);
	495	worker->idle = 1;
	496	workers->num_workers++;
	497	workers->num_workers_starting--;
	498	WARN_ON(workers->num_workers_starting < 0);
	499	spin_unlock_irq(&workers->lock);
	500
	501	return 0;
	502	fail:
	503	spin_lock_irq(&workers->lock);
	504	workers->num_workers_starting--;
	505	spin_unlock_irq(&workers->lock);
	506	return ret;
	507	}
	508
	509	int btrfs_start_workers(struct btrfs_workers *workers)
	510	{
	511	spin_lock_irq(&workers->lock);
	512	workers->num_workers_starting++;
	513	spin_unlock_irq(&workers->lock);
	514	return __btrfs_start_workers(workers);
	515	}
	516
	517	/*
	518	* run through the list and find a worker thread that doesn't have a lot
	519	* to do right now. This can return null if we aren't yet at the thread
	520	* count limit and all of the threads are busy.
	521	*/
	522	static struct btrfs_worker_thread next_worker(struct btrfs_workers workers)
	523	{
	524	struct btrfs_worker_thread *worker;
	525	struct list_head *next;
	526	int enforce_min;
	527
	528	enforce_min = (workers->num_workers + workers->num_workers_starting) <
	529	workers->max_workers;
	530
	531	/*
	532	* if we find an idle thread, don't move it to the end of the
	533	* idle list. This improves the chance that the next submission
	534	* will reuse the same thread, and maybe catch it while it is still
	535	* working
	536	*/
	537	if (!list_empty(&workers->idle_list)) {
	538	next = workers->idle_list.next;
	539	worker = list_entry(next, struct btrfs_worker_thread,
	540	worker_list);
	541	return worker;
	542	}
	543	if (enforce_min \|\| list_empty(&workers->worker_list))
	544	return NULL;
	545
	546	/*
	547	* if we pick a busy task, move the task to the end of the list.
	548	* hopefully this will keep things somewhat evenly balanced.
	549	* Do the move in batches based on the sequence number. This groups
	550	* requests submitted at roughly the same time onto the same worker.
	551	*/
	552	next = workers->worker_list.next;
	553	worker = list_entry(next, struct btrfs_worker_thread, worker_list);
	554	worker->sequence++;
	555
	556	if (worker->sequence % workers->idle_thresh == 0)
	557	list_move_tail(next, &workers->worker_list);
	558	return worker;
	559	}
	560
	561	/*
	562	* selects a worker thread to take the next job. This will either find
	563	* an idle worker, start a new worker up to the max count, or just return
	564	* one of the existing busy workers.
	565	*/
	566	static struct btrfs_worker_thread find_worker(struct btrfs_workers workers)
	567	{
	568	struct btrfs_worker_thread *worker;
	569	unsigned long flags;
	570	struct list_head *fallback;
	571	int ret;
	572
	573	spin_lock_irqsave(&workers->lock, flags);
	574	again:
	575	worker = next_worker(workers);
	576
	577	if (!worker) {
	578	if (workers->num_workers + workers->num_workers_starting >=
	579	workers->max_workers) {
	580	goto fallback;
	581	} else if (workers->atomic_worker_start) {
	582	workers->atomic_start_pending = 1;
	583	goto fallback;
	584	} else {
	585	workers->num_workers_starting++;
	586	spin_unlock_irqrestore(&workers->lock, flags);
	587	/* we're below the limit, start another worker */
	588	ret = __btrfs_start_workers(workers);
	589	spin_lock_irqsave(&workers->lock, flags);
	590	if (ret)
	591	goto fallback;
	592	goto again;
	593	}
	594	}
	595	goto found;
	596
	597	fallback:
	598	fallback = NULL;
	599	/*
	600	* we have failed to find any workers, just
	601	* return the first one we can find.
	602	*/
	603	if (!list_empty(&workers->worker_list))
	604	fallback = workers->worker_list.next;
	605	if (!list_empty(&workers->idle_list))
	606	fallback = workers->idle_list.next;
	607	BUG_ON(!fallback);
	608	worker = list_entry(fallback,
	609	struct btrfs_worker_thread, worker_list);
	610	found:
	611	/*
	612	* this makes sure the worker doesn't exit before it is placed
	613	* onto a busy/idle list
	614	*/
	615	atomic_inc(&worker->num_pending);
	616	spin_unlock_irqrestore(&workers->lock, flags);
	617	return worker;
	618	}
	619
	620	/*
	621	* btrfs_requeue_work just puts the work item back on the tail of the list
	622	* it was taken from. It is intended for use with long running work functions
	623	* that make some progress and want to give the cpu up for others.
	624	*/
	625	void btrfs_requeue_work(struct btrfs_work *work)
	626	{
	627	struct btrfs_worker_thread *worker = work->worker;
	628	unsigned long flags;
	629	int wake = 0;
	630
	631	if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
	632	return;
	633
	634	spin_lock_irqsave(&worker->lock, flags);
	635	if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
	636	list_add_tail(&work->list, &worker->prio_pending);
	637	else
	638	list_add_tail(&work->list, &worker->pending);
	639	atomic_inc(&worker->num_pending);
	640
	641	/* by definition we're busy, take ourselves off the idle
	642	* list
	643	*/
	644	if (worker->idle) {
	645	spin_lock(&worker->workers->lock);
	646	worker->idle = 0;
	647	list_move_tail(&worker->worker_list,
	648	&worker->workers->worker_list);
	649	spin_unlock(&worker->workers->lock);
	650	}
	651	if (!worker->working) {
	652	wake = 1;
	653	worker->working = 1;
	654	}
	655
	656	if (wake)
	657	wake_up_process(worker->task);
	658	spin_unlock_irqrestore(&worker->lock, flags);
	659	}
	660
	661	void btrfs_set_work_high_prio(struct btrfs_work *work)
	662	{
	663	set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
	664	}
	665
	666	/*
	667	* places a struct btrfs_work into the pending queue of one of the kthreads
	668	*/
	669	void btrfs_queue_worker(struct btrfs_workers workers, struct btrfs_work work)
	670	{
	671	struct btrfs_worker_thread *worker;
	672	unsigned long flags;
	673	int wake = 0;
	674
	675	/* don't requeue something already on a list */
	676	if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
	677	return;
	678
	679	worker = find_worker(workers);
	680	if (workers->ordered) {
	681	/*
	682	* you're not allowed to do ordered queues from an
	683	* interrupt handler
	684	*/
	685	spin_lock(&workers->order_lock);
	686	if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
	687	list_add_tail(&work->order_list,
	688	&workers->prio_order_list);
	689	} else {
	690	list_add_tail(&work->order_list, &workers->order_list);
	691	}
	692	spin_unlock(&workers->order_lock);
	693	} else {
	694	INIT_LIST_HEAD(&work->order_list);
	695	}
	696
	697	spin_lock_irqsave(&worker->lock, flags);
	698
	699	if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
	700	list_add_tail(&work->list, &worker->prio_pending);
	701	else
	702	list_add_tail(&work->list, &worker->pending);
	703	check_busy_worker(worker);
	704
	705	/*
	706	* avoid calling into wake_up_process if this thread has already
	707	* been kicked
	708	*/
	709	if (!worker->working)
	710	wake = 1;
	711	worker->working = 1;
	712
	713	if (wake)
	714	wake_up_process(worker->task);
	715	spin_unlock_irqrestore(&worker->lock, flags);
	716	}