From: Haibo Chen Date: Tue, 9 Dec 2014 09:04:05 +0000 (+0800) Subject: mmc: sdhci: use pipeline mmc requests to improve performance X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=348487cb28e6;p=GitHub%2FLineageOS%2Fandroid_kernel_motorola_exynos9610.git mmc: sdhci: use pipeline mmc requests to improve performance This patch is based on the patches by Per Forlin, Tony Lin and Ryan QIAN. This patch complete the API 'post_req' and 'pre_req' in sdhci host side, Test Env: 1. i.MX6Q-SABREAUTO board, CPU @ 996MHz, use ADMA in uSDHC controller. 2. Test command: $ echo 1 > /proc/sys/vm/drop_caches write to sd card: $ dd if=/dev/zero of=/dev/mmcblk0 bs=1M count=2000 conv=fsync read the sd card: $ dd if=/dev/mmcblk0 of=/dev/null bs=1M count=2000 3. TOSHIBA 16GB SD3.0 card, running at 4 bit, SDR104 @ 198MHZ Performance with and without this patch: ------------------------------------------------- | | read speed | write speed | |------------------------------------------------ | with this patch | ~76.7 MB/s | ~23.3 MB/s | |------------------------------------------------ |without this patch | ~60.5 MB/s | ~22.5 MB/s | ------------------------------------------------- 4. SanDisk 8GB SD3.0 card, running at 4 bit, DDR50 @ 50MHZ Performance with and without this patch: ------------------------------------------------- | | read speed | write speed | |------------------------------------------------ | with this patch | ~40.5 MB/s | ~15.6 MB/s | |------------------------------------------------ |without this patch | ~36.1 MB/s | ~14.1 MB/s | ------------------------------------------------- 5. Kingston 8GB SD2.0 card, running at 4 bit, High-speed @ 50MHZ Performance with and without this patch: ------------------------------------------------- | | read speed | write speed | |------------------------------------------------ | with this patch | ~22.7 MB/s | ~8.2 MB/s | |------------------------------------------------ |without this patch | ~21.3 MB/s | ~8.0 MB/s | ------------------------------------------------- 6. About eMMC, Sandisk 8GB eMMC on i.MX6DL-sabresd board, CPU @ 792MHZ, eMMC running at 8 bit, DDR52 @ 52MHZ. Performance with and without this patch: ------------------------------------------------- | | read speed | write speed | |------------------------------------------------ | with this patch | ~37.3 MB/s | ~10.5 MB/s | |------------------------------------------------ |without this patch | ~33.4 MB/s | ~10.5 MB/s | ------------------------------------------------- Signed-off-by: Haibo Chen Signed-off-by: Ulf Hansson --- diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 2558d705c92e..1aab8a120d02 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -53,6 +53,9 @@ static void sdhci_finish_command(struct sdhci_host *); static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode); static void sdhci_tuning_timer(unsigned long data); static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable); +static int sdhci_pre_dma_transfer(struct sdhci_host *host, + struct mmc_data *data, + struct sdhci_host_next *next); #ifdef CONFIG_PM static int sdhci_runtime_pm_get(struct sdhci_host *host); @@ -505,9 +508,8 @@ static int sdhci_adma_table_pre(struct sdhci_host *host, goto fail; BUG_ON(host->align_addr & host->align_mask); - host->sg_count = dma_map_sg(mmc_dev(host->mmc), - data->sg, data->sg_len, direction); - if (host->sg_count == 0) + host->sg_count = sdhci_pre_dma_transfer(host, data, NULL); + if (host->sg_count < 0) goto unmap_align; desc = host->adma_table; @@ -645,8 +647,9 @@ static void sdhci_adma_table_post(struct sdhci_host *host, } } - dma_unmap_sg(mmc_dev(host->mmc), data->sg, - data->sg_len, direction); + if (!data->host_cookie) + dma_unmap_sg(mmc_dev(host->mmc), data->sg, + data->sg_len, direction); } static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd) @@ -842,11 +845,7 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd) } else { int sg_cnt; - sg_cnt = dma_map_sg(mmc_dev(host->mmc), - data->sg, data->sg_len, - (data->flags & MMC_DATA_READ) ? - DMA_FROM_DEVICE : - DMA_TO_DEVICE); + sg_cnt = sdhci_pre_dma_transfer(host, data, NULL); if (sg_cnt == 0) { /* * This only happens when someone fed @@ -959,8 +958,10 @@ static void sdhci_finish_data(struct sdhci_host *host) if (host->flags & SDHCI_USE_ADMA) sdhci_adma_table_post(host, data); else { - dma_unmap_sg(mmc_dev(host->mmc), data->sg, - data->sg_len, (data->flags & MMC_DATA_READ) ? + if (!data->host_cookie) + dma_unmap_sg(mmc_dev(host->mmc), + data->sg, data->sg_len, + (data->flags & MMC_DATA_READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } } @@ -2125,6 +2126,77 @@ static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable) } } +static void sdhci_post_req(struct mmc_host *mmc, struct mmc_request *mrq, + int err) +{ + struct sdhci_host *host = mmc_priv(mmc); + struct mmc_data *data = mrq->data; + + if (host->flags & SDHCI_REQ_USE_DMA) { + if (data->host_cookie) + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + data->flags & MMC_DATA_WRITE ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + mrq->data->host_cookie = 0; + } +} + +static int sdhci_pre_dma_transfer(struct sdhci_host *host, + struct mmc_data *data, + struct sdhci_host_next *next) +{ + int sg_count; + + if (!next && data->host_cookie && + data->host_cookie != host->next_data.cookie) { + pr_debug(DRIVER_NAME "[%s] invalid cookie: %d, next-cookie %d\n", + __func__, data->host_cookie, host->next_data.cookie); + data->host_cookie = 0; + } + + /* Check if next job is already prepared */ + if (next || + (!next && data->host_cookie != host->next_data.cookie)) { + sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, + data->sg_len, + data->flags & MMC_DATA_WRITE ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + + } else { + sg_count = host->next_data.sg_count; + host->next_data.sg_count = 0; + } + + + if (sg_count == 0) + return -EINVAL; + + if (next) { + next->sg_count = sg_count; + data->host_cookie = ++next->cookie < 0 ? 1 : next->cookie; + } else + host->sg_count = sg_count; + + return sg_count; +} + +static void sdhci_pre_req(struct mmc_host *mmc, struct mmc_request *mrq, + bool is_first_req) +{ + struct sdhci_host *host = mmc_priv(mmc); + + if (mrq->data->host_cookie) { + mrq->data->host_cookie = 0; + return; + } + + if (host->flags & SDHCI_REQ_USE_DMA) + if (sdhci_pre_dma_transfer(host, + mrq->data, + &host->next_data) < 0) + mrq->data->host_cookie = 0; +} + static void sdhci_card_event(struct mmc_host *mmc) { struct sdhci_host *host = mmc_priv(mmc); @@ -2158,6 +2230,8 @@ static void sdhci_card_event(struct mmc_host *mmc) static const struct mmc_host_ops sdhci_ops = { .request = sdhci_request, + .post_req = sdhci_post_req, + .pre_req = sdhci_pre_req, .set_ios = sdhci_set_ios, .get_cd = sdhci_get_cd, .get_ro = sdhci_get_ro, @@ -3015,6 +3089,7 @@ int sdhci_add_host(struct sdhci_host *host) host->max_clk = host->ops->get_max_clock(host); } + host->next_data.cookie = 1; /* * In case of Host Controller v3.00, find out whether clock * multiplier is supported. diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index f767a0de611f..cb8b94ff6a26 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -17,6 +17,11 @@ #include #include +struct sdhci_host_next { + unsigned int sg_count; + s32 cookie; +}; + struct sdhci_host { /* Data set by hardware interface driver */ const char *hw_name; /* Hardware bus name */ @@ -203,6 +208,7 @@ struct sdhci_host { #define SDHCI_TUNING_MODE_1 0 struct timer_list tuning_timer; /* Timer for tuning */ + struct sdhci_host_next next_data; unsigned long private[0] ____cacheline_aligned; }; #endif /* LINUX_MMC_SDHCI_H */