From 29871f4dcd8573acbaa31daaf028ab488eb2059a Mon Sep 17 00:00:00 2001 From: "hyesoo.yu" Date: Fri, 16 Jun 2017 16:44:51 +0900 Subject: [PATCH] [COMMON] g2d: set the qos and device frequency The driver calculates the needed frequency by frame information from user for performance. Change-Id: I99b8c2a98efac2a6bab07de19b3d04dd29cd2a74 Signed-off-by: hyesoo.yu --- drivers/gpu/exynos/g2d/Makefile | 2 +- drivers/gpu/exynos/g2d/g2d.h | 35 ++++ drivers/gpu/exynos/g2d/g2d_drv.c | 79 +++++++++ drivers/gpu/exynos/g2d/g2d_perf.c | 286 ++++++++++++++++++++++++++++++ drivers/gpu/exynos/g2d/g2d_perf.h | 40 +++++ drivers/gpu/exynos/g2d/g2d_uapi.h | 52 ++++++ 6 files changed, 493 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/exynos/g2d/g2d_perf.c create mode 100644 drivers/gpu/exynos/g2d/g2d_perf.h diff --git a/drivers/gpu/exynos/g2d/Makefile b/drivers/gpu/exynos/g2d/Makefile index 1db499152d20..090080059b6c 100644 --- a/drivers/gpu/exynos/g2d/Makefile +++ b/drivers/gpu/exynos/g2d/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_EXYNOS_GRAPHICS_G2D) += g2d_drv.o g2d_task.o g2d_regs.o obj-$(CONFIG_EXYNOS_GRAPHICS_G2D) += g2d_uapi_process.o g2d_command.o -obj-$(CONFIG_EXYNOS_GRAPHICS_G2D) += g2d_fence.o g2d_debug.o +obj-$(CONFIG_EXYNOS_GRAPHICS_G2D) += g2d_fence.o g2d_debug.o g2d_perf.o diff --git a/drivers/gpu/exynos/g2d/g2d.h b/drivers/gpu/exynos/g2d/g2d.h index 22fac19314ae..9adae81d3d96 100644 --- a/drivers/gpu/exynos/g2d/g2d.h +++ b/drivers/gpu/exynos/g2d/g2d.h @@ -18,6 +18,7 @@ #include #include #include +#include struct g2d_task; /* defined in g2d_task.h */ @@ -38,6 +39,27 @@ enum g2d_priority { #define G2D_DEVICE_STATE_SUSPEND 1 #define G2D_DEVICE_STATE_IOVMM_DISABLED 2 +enum g2d_hw_ppc { + G2D_PPC_DEFAULT, + G2D_PPC_SCALE, + G2D_PPC_ROTATE, + G2D_PPC_SCALE_ROTATE, + G2D_PPC_YUV2P, + G2D_PPC_YUV2P_SCALE, + G2D_PPC_YUV2P_ROTATE, + G2D_PPC_YUV2P_SCALE_ROTATE, + G2D_PPC_COLORFILL, + G2D_PPC_DST_DEFAULT, + G2D_PPC_DST_YUV2P, + G2D_PPC_DST_ROT, + G2D_PPC_END, +}; + +struct g2d_dvfs_table { + u32 lv; + u32 freq; +}; + struct g2d_device { unsigned long state; @@ -67,12 +89,25 @@ struct g2d_device { struct dentry *debug_logs; atomic_t prior_stats[G2D_PRIORITY_END]; + + struct mutex lock_qos; + struct list_head qos_contexts; + u32 hw_ppc[G2D_PPC_END]; + + struct g2d_dvfs_table *dvfs_table; + u32 dvfs_table_cnt; }; struct g2d_context { struct g2d_device *g2d_dev; struct shared_buffer_info *hwfc_info; u32 priority; + + struct pm_qos_request req; + + struct list_head qos_node; + u64 r_bw; + u64 w_bw; }; int g2d_device_run(struct g2d_device *g2d_dev, struct g2d_task *task); diff --git a/drivers/gpu/exynos/g2d/g2d_drv.c b/drivers/gpu/exynos/g2d/g2d_drv.c index 18cdb5b9581f..e1da54da2e97 100644 --- a/drivers/gpu/exynos/g2d/g2d_drv.c +++ b/drivers/gpu/exynos/g2d/g2d_drv.c @@ -30,6 +30,7 @@ #include "g2d_task.h" #include "g2d_uapi_process.h" #include "g2d_debug.h" +#include "g2d_perf.h" #define MODULE_NAME "exynos-g2d" @@ -250,6 +251,8 @@ static int g2d_open(struct inode *inode, struct file *filp) g2d_ctx->priority = G2D_DEFAULT_PRIORITY; atomic_inc(&g2d_dev->prior_stats[g2d_ctx->priority]); + INIT_LIST_HEAD(&g2d_ctx->qos_node); + return 0; } @@ -268,6 +271,8 @@ static int g2d_release(struct inode *inode, struct file *filp) kfree(g2d_ctx->hwfc_info); } + g2d_put_performance(g2d_ctx); + kfree(g2d_ctx); return 0; @@ -383,6 +388,20 @@ static long g2d_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) break; } + case G2D_IOC_PERFORMANCE: + { + struct g2d_performance_data data; + + if (copy_from_user(&data, (void __user *)arg, sizeof(data))) { + dev_err(g2d_dev->dev, + "%s: Failed to read perf data\n", __func__); + ret = -EFAULT; + break; + } + g2d_set_performance(ctx, &data); + + break; + } } return ret; @@ -424,6 +443,59 @@ static int g2d_notifier_event(struct notifier_block *this, return NOTIFY_OK; } +static unsigned int g2d_default_ppc[G2D_PPC_END] = + {3500, 3200, 3500, 3000, + 3500, 3100, 3000, 2800, + 3800, 3500, 2800, 2500}; + +static struct g2d_dvfs_table g2d_default_dvfs_table[] = { + {534000, 711000}, + {400000, 534000}, + {336000, 400000}, + {267000, 356000}, + {178000, 200000}, + {107000, 134000}, +}; + +static int g2d_parse_dt(struct g2d_device *g2d_dev) +{ + struct device *dev = g2d_dev->dev; + int i, len; + + if (of_property_read_u32_array(dev->of_node, "hw_ppc", + (u32 *)g2d_dev->hw_ppc, + (size_t)(ARRAY_SIZE(g2d_dev->hw_ppc)))) { + dev_err(dev, "Failed to parse device tree for hw ppc"); + + for (i = 0; i < G2D_PPC_END; i++) + g2d_dev->hw_ppc[i] = g2d_default_ppc[i]; + } + + len = of_property_count_u32_elems(dev->of_node, "g2d_dvfs_table"); + if (len < 0) + g2d_dev->dvfs_table_cnt = ARRAY_SIZE(g2d_default_dvfs_table); + else + g2d_dev->dvfs_table_cnt = len / 2; + + g2d_dev->dvfs_table = devm_kzalloc(dev, + sizeof(struct g2d_dvfs_table) * + g2d_dev->dvfs_table_cnt, + GFP_KERNEL); + if (!g2d_dev->dvfs_table) + return -ENOMEM; + + if (len < 0) { + memcpy(g2d_dev->dvfs_table, g2d_default_dvfs_table, + sizeof(struct g2d_dvfs_table) * + g2d_dev->dvfs_table_cnt); + } else { + of_property_read_u32_array(dev->of_node, "g2d_dvfs_table", + (unsigned int *)g2d_dev->dvfs_table, len); + } + + return 0; +} + static int g2d_probe(struct platform_device *pdev) { struct g2d_device *g2d_dev; @@ -465,6 +537,10 @@ static int g2d_probe(struct platform_device *pdev) iovmm_set_fault_handler(&pdev->dev, g2d_iommu_fault_handler, g2d_dev); + ret = g2d_parse_dt(g2d_dev); + if (ret < 0) + return ret; + ret = iovmm_activate(&pdev->dev); if (ret < 0) { dev_err(&pdev->dev, "Failed to activate iommu\n"); @@ -495,6 +571,9 @@ static int g2d_probe(struct platform_device *pdev) INIT_LIST_HEAD(&g2d_dev->tasks_free_hwfc); INIT_LIST_HEAD(&g2d_dev->tasks_prepared); INIT_LIST_HEAD(&g2d_dev->tasks_active); + INIT_LIST_HEAD(&g2d_dev->qos_contexts); + + mutex_init(&g2d_dev->lock_qos); ret = g2d_create_tasks(g2d_dev); if (ret < 0) { diff --git a/drivers/gpu/exynos/g2d/g2d_perf.c b/drivers/gpu/exynos/g2d/g2d_perf.c new file mode 100644 index 000000000000..0f9183d7a39d --- /dev/null +++ b/drivers/gpu/exynos/g2d/g2d_perf.c @@ -0,0 +1,286 @@ +/* + * linux/drivers/gpu/exynos/g2d/g2d_perf.c + * + * Copyright (C) 2017 Samsung Electronics Co., Ltd. + * + * Contact: Hyesoo Yu + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include "g2d.h" +#include "g2d_perf.h" +#include "g2d_task.h" +#include "g2d_uapi.h" + +#ifdef CONFIG_PM_DEVFREQ +static void g2d_pm_qos_update_devfreq(struct pm_qos_request *req, u32 freq) +{ + /* + * FIXME: PM_QOS_DEVICE_THROUGHPUT is not in the upstream kernel. + * if (!pm_qos_request_active(req)) + * pm_qos_add_request(req, PM_QOS_DEVICE_THROUGHPUT, 0); + */ + + pm_qos_update_request(req, freq); +} + +static void g2d_pm_qos_remove_devfreq(struct pm_qos_request *req) +{ + if (pm_qos_request_active(req)) + pm_qos_remove_request(req); +} +#else +#define g2d_pm_qos_update_devfreq(req, freq) do { } while (0) +#define g2d_pm_qos_remove_devfreq(req) do { } while (0) +#endif + +static bool g2d_still_need_perf(struct g2d_device *g2d_dev) +{ + struct g2d_task *task; + unsigned long flags; + + spin_lock_irqsave(&g2d_dev->lock_task, flags); + for (task = g2d_dev->tasks; task != NULL; task = task->next) { + if (!is_task_state_idle(task)) { + spin_unlock_irqrestore(&g2d_dev->lock_task, flags); + return true; + } + } + spin_unlock_irqrestore(&g2d_dev->lock_task, flags); + + return false; +} + +static void g2d_set_device_frequency(struct g2d_context *g2d_ctx, + struct g2d_performance_data *data) +{ + struct g2d_device *g2d_dev = g2d_ctx->g2d_dev; + struct g2d_performance_frame_data *frame; + struct g2d_performance_layer_data *layer, *pair; + unsigned int cycle, cycle_src, cycle_dst, ip_clock; + unsigned int rot_size, no_rot_size; + unsigned int dst_ppc, ppc[G2D_MAX_IMAGES]; + int i, j; + char sc, yuv2p, rot, rot_skip, gap; + + cycle = 0; + gap = false; + + for (i = 0; i < data->num_frame; i++) { + frame = &data->frame[i]; + + rot_size = 0; + no_rot_size = 0; + cycle_src = 0; + + /* + * The rotate variable means that the rotated layers and + * non-rotated layers are mixed. + * If all layers are rotated or are non rotated, that is + * excluded. + */ + rot = 0; + for (j = 0; j < frame->num_layers; j++) { + if (is_perf_layer_rotate(&frame->layer[j])) + rot++; + } + rot_skip = (rot == frame->num_layers) ? 1 : 0; + + for (j = 0; j < frame->num_layers; j++) { + layer = &frame->layer[j]; + + yuv2p = is_perf_layer_yuv2p(layer) ? 1 : 0; + sc = is_perf_layer_scaling(layer) ? 1 : 0; + rot = !rot_skip && is_perf_layer_rotate(layer) ? 1 : 0; + + ppc[j] = + g2d_dev->hw_ppc[(yuv2p << 2) | (rot << 1) | sc]; + + cycle_src += layer->pixelcount / ppc[j]; + + /* + * check rotated size for cycle_dst. rotated size is + * bigger than non-rotated size, g2d write direction + * is vertical, and it affects performance. + */ + if (is_perf_layer_rotate(layer)) + rot_size += layer->pixelcount; + else + no_rot_size += layer->pixelcount; + + /* + * The rotated layer affects the pair layer, + * so we add the cycle using gap_ppc between pair + * N layer and N+1 layer. The gap ppc is calculated + * on odd layer and gap_pixelcount is pair layer's + * nested region from 2 layers that means + * the smaller region. + */ + if (rot && (yuv2p || sc)) + gap = true; + + if (gap && (j & 0x1)) { + unsigned int gap_pixelcount, gap_ppc; + + pair = &frame->layer[j - 1]; + gap = false; + + gap_ppc = (ppc[j] > ppc[j - 1]) ? + (ppc[j] - ppc[j - 1]) : + (ppc[j - 1] - ppc[j]); + if (!gap_ppc) + continue; + + gap_ppc = (ppc[j] * ppc[j - 1]) / gap_ppc; + + gap_pixelcount = min(layer->pixelcount, pair->pixelcount); + + cycle_src += gap_pixelcount / gap_ppc; + } + } + + rot = (rot_size > no_rot_size) ? 1 : 0; + if (!rot && is_perf_frame_yuv2p(frame)) + dst_ppc = g2d_dev->hw_ppc[G2D_PPC_DST_YUV2P]; + else if (!rot) + dst_ppc = g2d_dev->hw_ppc[G2D_PPC_DST_DEFAULT]; + else + dst_ppc = g2d_dev->hw_ppc[G2D_PPC_DST_ROT]; + + cycle_dst = frame->target_pixelcount / dst_ppc; + + cycle += max(cycle_src, cycle_dst); + + if (is_perf_frame_colorfill(frame)) + cycle += frame->target_pixelcount / + g2d_dev->hw_ppc[G2D_PPC_COLORFILL]; + } + + /* ip_clock(Mhz) = cycles / time_in_ms * 1000 */ + ip_clock = (cycle / 8) * 1000; + + for (i = 0; i < g2d_dev->dvfs_table_cnt; i++) { + if (ip_clock > g2d_dev->dvfs_table[i].freq) { + ip_clock = (i == 0) ? + g2d_dev->dvfs_table[i].lv : + g2d_dev->dvfs_table[i - 1].lv; + break; + } + } + + if (!ip_clock && !g2d_still_need_perf(g2d_dev)) + g2d_pm_qos_remove_devfreq(&g2d_ctx->req); + else if (ip_clock) + g2d_pm_qos_update_devfreq(&g2d_ctx->req, ip_clock); +} + +static void g2d_set_qos_frequency(struct g2d_context *g2d_ctx, + struct g2d_performance_data *data) +{ + struct g2d_device *g2d_dev = g2d_ctx->g2d_dev; + struct g2d_performance_frame_data *frame; + u32 cur_rbw, rbw; + u32 cur_wbw, wbw; + int i; + + cur_rbw = 0; + cur_wbw = 0; + rbw = 0; + wbw = 0; + + for (i = 0; i < data->num_frame; i++) { + frame = &data->frame[i]; + + rbw += frame->bandwidth_read; + wbw += frame->bandwidth_write; + } + + if (list_empty(&g2d_ctx->qos_node) && !rbw && !wbw) + return; + + if (!rbw && !rbw && g2d_still_need_perf(g2d_dev)) + return; + + mutex_lock(&g2d_dev->lock_qos); + + if (!list_empty(&g2d_dev->qos_contexts)) { + struct g2d_context *ctx_qos; + + ctx_qos = list_first_entry(&g2d_dev->qos_contexts, + struct g2d_context, qos_node); + cur_rbw = ctx_qos->r_bw; + cur_wbw = ctx_qos->w_bw; + } + + /* this works although ctx is not attached to qos_contexts */ + list_del_init(&g2d_ctx->qos_node); + + g2d_ctx->r_bw = rbw; + g2d_ctx->w_bw = wbw; + + if (rbw || wbw) { + struct list_head *node; + + for (node = g2d_dev->qos_contexts.prev; + node != &g2d_dev->qos_contexts; + node = node->prev) { + struct g2d_context *curctx = list_entry(node, + struct g2d_context, qos_node); + if ((curctx->r_bw + curctx->w_bw) > (rbw + wbw)) + break; + } + /* + * node always points to the head node or the smallest bw node + * among the larger bw nodes than qosnode + */ + list_add(&g2d_ctx->qos_node, node); + } + + if (!list_empty(&g2d_dev->qos_contexts)) { + struct g2d_context *ctx_qos; + + ctx_qos = list_first_entry(&g2d_dev->qos_contexts, + struct g2d_context, qos_node); + /* bandwidth request is changed */ + rbw = ctx_qos->r_bw; + wbw = ctx_qos->w_bw; + } + + if ((rbw != cur_rbw) || (wbw != cur_wbw)) { + /* + * FIXME: BTS is not available for now + * struct bts_bw bw; + * + * bw.peak = ((rbw + wbw) / 1000) * BTS_PEAK_FPS_RATIO / 2; + * bw.write = wbw; + * bw.read = rbw; + * bts_update_bw(BTS_BW_G2D, bw); + */ + } + + mutex_unlock(&g2d_dev->lock_qos); +} + +void g2d_set_performance(struct g2d_context *ctx, + struct g2d_performance_data *data) +{ + g2d_set_qos_frequency(ctx, data); + g2d_set_device_frequency(ctx, data); +} + +void g2d_put_performance(struct g2d_context *ctx) +{ + struct g2d_performance_data data; + + data.num_frame = 0; + + g2d_set_performance(ctx, &data); +} diff --git a/drivers/gpu/exynos/g2d/g2d_perf.h b/drivers/gpu/exynos/g2d/g2d_perf.h new file mode 100644 index 000000000000..a437824f81dc --- /dev/null +++ b/drivers/gpu/exynos/g2d/g2d_perf.h @@ -0,0 +1,40 @@ +/* + * linux/drivers/gpu/exynos/g2d/g2d_perf.h + * + * Copyright (C) 2017 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _G2D_PERF_H_ +#define _G2D_PEEF_H_ + +struct g2d_context; +struct g2d_performance_data; + +#define is_perf_layer_yuv2p(layer) \ + (((layer)->layer_attr) & G2D_PERF_LAYER_YUV2P) +#define is_perf_layer_scaling(layer) \ + (((layer)->layer_attr) & G2D_PERF_LAYER_SCALING) +#define is_perf_layer_rotate(layer) \ + (((layer)->layer_attr) & G2D_PERF_LAYER_ROTATE) + +#define is_perf_frame_yuv2p(frame) \ + (((frame)->frame_attr) & G2D_PERF_FRAME_YUV2P) +#define is_perf_frame_colorfill(frame) \ + (((frame)->frame_attr) & G2D_PERF_FRAME_SOLIDCOLORFILL) + +#define BTS_PEAK_FPS_RATIO 1667 + +void g2d_set_performance(struct g2d_context *ctx, + struct g2d_performance_data *data); +void g2d_put_performance(struct g2d_context *ctx); + +#endif /* _G2D_PERF_H_ */ diff --git a/drivers/gpu/exynos/g2d/g2d_uapi.h b/drivers/gpu/exynos/g2d/g2d_uapi.h index 4b1806c7101a..f57600b7a782 100644 --- a/drivers/gpu/exynos/g2d/g2d_uapi.h +++ b/drivers/gpu/exynos/g2d/g2d_uapi.h @@ -209,8 +209,60 @@ struct g2d_task_data { struct g2d_commands commands; }; +/* flags of g2d_performance_layer_data.layer_attr */ +#define G2D_PERF_LAYER_ROTATE (1 << 0) +#define G2D_PERF_LAYER_SCALING (1 << 1) +#define G2D_PERF_LAYER_YUV2P (1 << 2) + +/* + * struct g2d_performance_frame_data - description of needed performance. + * @pixelcount : the pixecount of layer, is used to calculate the frequency. + * @layer_attr : attribute of layer affecting performance. + */ +struct g2d_performance_layer_data { + __u32 pixelcount; + __u32 layer_attr; +}; + +/* flags of g2d_performance_frame_data.frame_attr */ +#define G2D_PERF_FRAME_SOLIDCOLORFILL (1 << 0) +#define G2D_PERF_FRAME_YUV2P (1 << 1) + +/* + * struct g2d_performance_frame_data - description of needed performance. + * @layer : the pixel count of each layer to be processed. + * @bandwidth_read : the size of bandwidth to read when processing. + * @bandwidth_write : the size of bandwidth to write when processing. + * @frame_rate : frame per second of the job. + * @frame_attr : frame attribute + * @num_layers : the number of layers to be processed. + */ +struct g2d_performance_frame_data { + struct g2d_performance_layer_data layer[G2D_MAX_IMAGES]; + __u32 bandwidth_read; + __u32 bandwidth_write; + __u32 target_pixelcount; + __u32 frame_rate; + __u32 frame_attr; + __u32 num_layers; +}; + +#define G2D_PERF_MAX_FRAMES 4 + +/* + * struct g2d_performance_data - description the needed performance. + * @frame: the descriptions of each request's bandwidth and cycles in a frame. + * @num_frame : the number of g2d job requested in a frame. + */ +struct g2d_performance_data { + struct g2d_performance_frame_data frame[G2D_PERF_MAX_FRAMES]; + __u32 num_frame; + __u32 reserved; +}; + #define G2D_IOC_PROCESS _IOWR('M', 4, struct g2d_task_data) #define G2D_IOC_PRIORITY _IOR('M', 5, int32_t) +#define G2D_IOC_PERFORMANCE _IOR('M', 6, struct g2d_performance_data) #endif /* _G2D_UAPI_H_ */ -- 2.20.1