#include <linux/sched/signal.h>
#include <trace/events/block.h>
+#include <linux/list_sort.h>
#include "md.h"
#include "raid5.h"
return 1;
}
-static void flush_deferred_bios(struct r5conf *conf)
+static void dispatch_bio_list(struct bio_list *tmp)
{
- struct bio_list tmp;
struct bio *bio;
- if (!conf->batch_bio_dispatch || !conf->group_cnt)
+ while ((bio = bio_list_pop(tmp)))
+ generic_make_request(bio);
+}
+
+static int cmp_stripe(void *priv, struct list_head *a, struct list_head *b)
+{
+ const struct r5pending_data *da = list_entry(a,
+ struct r5pending_data, sibling);
+ const struct r5pending_data *db = list_entry(b,
+ struct r5pending_data, sibling);
+ if (da->sector > db->sector)
+ return 1;
+ if (da->sector < db->sector)
+ return -1;
+ return 0;
+}
+
+static void dispatch_defer_bios(struct r5conf *conf, int target,
+ struct bio_list *list)
+{
+ struct r5pending_data *data;
+ struct list_head *first, *next = NULL;
+ int cnt = 0;
+
+ if (conf->pending_data_cnt == 0)
+ return;
+
+ list_sort(NULL, &conf->pending_list, cmp_stripe);
+
+ first = conf->pending_list.next;
+
+ /* temporarily move the head */
+ if (conf->next_pending_data)
+ list_move_tail(&conf->pending_list,
+ &conf->next_pending_data->sibling);
+
+ while (!list_empty(&conf->pending_list)) {
+ data = list_first_entry(&conf->pending_list,
+ struct r5pending_data, sibling);
+ if (&data->sibling == first)
+ first = data->sibling.next;
+ next = data->sibling.next;
+
+ bio_list_merge(list, &data->bios);
+ list_move(&data->sibling, &conf->free_list);
+ cnt++;
+ if (cnt >= target)
+ break;
+ }
+ conf->pending_data_cnt -= cnt;
+ BUG_ON(conf->pending_data_cnt < 0 || cnt < target);
+
+ if (next != &conf->pending_list)
+ conf->next_pending_data = list_entry(next,
+ struct r5pending_data, sibling);
+ else
+ conf->next_pending_data = NULL;
+ /* list isn't empty */
+ if (first != &conf->pending_list)
+ list_move_tail(&conf->pending_list, first);
+}
+
+static void flush_deferred_bios(struct r5conf *conf)
+{
+ struct bio_list tmp = BIO_EMPTY_LIST;
+
+ if (conf->pending_data_cnt == 0)
return;
- bio_list_init(&tmp);
spin_lock(&conf->pending_bios_lock);
- bio_list_merge(&tmp, &conf->pending_bios);
- bio_list_init(&conf->pending_bios);
+ dispatch_defer_bios(conf, conf->pending_data_cnt, &tmp);
+ BUG_ON(conf->pending_data_cnt != 0);
spin_unlock(&conf->pending_bios_lock);
- while ((bio = bio_list_pop(&tmp)))
- generic_make_request(bio);
+ dispatch_bio_list(&tmp);
}
-static void defer_bio_issue(struct r5conf *conf, struct bio *bio)
+static void defer_issue_bios(struct r5conf *conf, sector_t sector,
+ struct bio_list *bios)
{
- /*
- * change group_cnt will drain all bios, so this is safe
- *
- * A read generally means a read-modify-write, which usually means a
- * randwrite, so we don't delay it
- */
- if (!conf->batch_bio_dispatch || !conf->group_cnt ||
- bio_op(bio) == REQ_OP_READ) {
- generic_make_request(bio);
- return;
- }
+ struct bio_list tmp = BIO_EMPTY_LIST;
+ struct r5pending_data *ent;
+
spin_lock(&conf->pending_bios_lock);
- bio_list_add(&conf->pending_bios, bio);
+ ent = list_first_entry(&conf->free_list, struct r5pending_data,
+ sibling);
+ list_move_tail(&ent->sibling, &conf->pending_list);
+ ent->sector = sector;
+ bio_list_init(&ent->bios);
+ bio_list_merge(&ent->bios, bios);
+ conf->pending_data_cnt++;
+ if (conf->pending_data_cnt >= PENDING_IO_MAX)
+ dispatch_defer_bios(conf, PENDING_IO_ONE_FLUSH, &tmp);
+
spin_unlock(&conf->pending_bios_lock);
- md_wakeup_thread(conf->mddev->thread);
+
+ dispatch_bio_list(&tmp);
}
static void
struct r5conf *conf = sh->raid_conf;
int i, disks = sh->disks;
struct stripe_head *head_sh = sh;
+ struct bio_list pending_bios = BIO_EMPTY_LIST;
+ bool should_defer;
might_sleep();
}
}
+ should_defer = conf->batch_bio_dispatch && conf->group_cnt;
+
for (i = disks; i--; ) {
int op, op_flags = 0;
int replace_only = 0;
trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
bi, disk_devt(conf->mddev->gendisk),
sh->dev[i].sector);
- defer_bio_issue(conf, bi);
+ if (should_defer && op_is_write(op))
+ bio_list_add(&pending_bios, bi);
+ else
+ generic_make_request(bi);
}
if (rrdev) {
if (s->syncing || s->expanding || s->expanded
trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
rbi, disk_devt(conf->mddev->gendisk),
sh->dev[i].sector);
- defer_bio_issue(conf, rbi);
+ if (should_defer && op_is_write(op))
+ bio_list_add(&pending_bios, rbi);
+ else
+ generic_make_request(rbi);
}
if (!rdev && !rrdev) {
if (op_is_write(op))
if (sh != head_sh)
goto again;
}
+
+ if (should_defer && !bio_list_empty(&pending_bios))
+ defer_issue_bios(conf, head_sh->sector, &pending_bios);
}
static struct dma_async_tx_descriptor *
put_page(conf->disks[i].extra_page);
kfree(conf->disks);
kfree(conf->stripe_hashtbl);
+ kfree(conf->pending_data);
kfree(conf);
}
conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL);
if (conf == NULL)
goto abort;
+ INIT_LIST_HEAD(&conf->free_list);
+ INIT_LIST_HEAD(&conf->pending_list);
+ conf->pending_data = kzalloc(sizeof(struct r5pending_data) *
+ PENDING_IO_MAX, GFP_KERNEL);
+ if (!conf->pending_data)
+ goto abort;
+ for (i = 0; i < PENDING_IO_MAX; i++)
+ list_add(&conf->pending_data[i].sibling, &conf->free_list);
/* Don't enable multi-threading by default*/
if (!alloc_thread_groups(conf, 0, &group_cnt, &worker_cnt_per_group,
&new_group)) {
atomic_set(&conf->active_stripes, 0);
atomic_set(&conf->preread_active_stripes, 0);
atomic_set(&conf->active_aligned_reads, 0);
- bio_list_init(&conf->pending_bios);
spin_lock_init(&conf->pending_bios_lock);
conf->batch_bio_dispatch = true;
rdev_for_each(rdev, mddev) {