When drivers or the core calls this function, they usually
dereference the request shortly there after. Prefetch the first
cache line.
Profiling IO workloads shows that this is the most common cache
miss on the block side of things.
Signed-off-by: Jens Axboe <axboe@fb.com>
#include <linux/sched/sysctl.h>
#include <linux/delay.h>
#include <linux/crash_dump.h>
+#include <linux/prefetch.h>
#include <trace/events/block.h>
struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
{
- if (tag < tags->nr_tags)
+ if (tag < tags->nr_tags) {
+ prefetch(tags->rqs[tag]);
return tags->rqs[tag];
+ }
return NULL;
}