[COMMON] g2d: set the qos and device frequency
authorhyesoo.yu <hyesoo.yu@samsung.com>
Fri, 16 Jun 2017 07:44:51 +0000 (16:44 +0900)
committerSeungchul Kim <sc377.kim@samsung.com>
Mon, 28 May 2018 05:27:27 +0000 (14:27 +0900)
The driver calculates the needed frequency by
frame information from user for performance.

Change-Id: I99b8c2a98efac2a6bab07de19b3d04dd29cd2a74
Signed-off-by: hyesoo.yu <hyesoo.yu@samsung.com>
drivers/gpu/exynos/g2d/Makefile
drivers/gpu/exynos/g2d/g2d.h
drivers/gpu/exynos/g2d/g2d_drv.c
drivers/gpu/exynos/g2d/g2d_perf.c [new file with mode: 0644]
drivers/gpu/exynos/g2d/g2d_perf.h [new file with mode: 0644]
drivers/gpu/exynos/g2d/g2d_uapi.h

index 1db499152d20cb738563c8e2dc4a60d570fe0f33..090080059b6cf22eecacebf6d8632e6cce917899 100644 (file)
@@ -1,3 +1,3 @@
 obj-$(CONFIG_EXYNOS_GRAPHICS_G2D) += g2d_drv.o g2d_task.o g2d_regs.o
 obj-$(CONFIG_EXYNOS_GRAPHICS_G2D) += g2d_uapi_process.o g2d_command.o
-obj-$(CONFIG_EXYNOS_GRAPHICS_G2D) += g2d_fence.o g2d_debug.o
+obj-$(CONFIG_EXYNOS_GRAPHICS_G2D) += g2d_fence.o g2d_debug.o g2d_perf.o
index 22fac19314ae13c3a3e18d26f24fab1717d11697..9adae81d3d96c521952fbad09d7215f00b5537ac 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/device.h>
 #include <linux/miscdevice.h>
 #include <media/exynos_repeater.h>
+#include <linux/pm_qos.h>
 
 struct g2d_task; /* defined in g2d_task.h */
 
@@ -38,6 +39,27 @@ enum g2d_priority {
 #define G2D_DEVICE_STATE_SUSPEND       1
 #define G2D_DEVICE_STATE_IOVMM_DISABLED        2
 
+enum g2d_hw_ppc {
+       G2D_PPC_DEFAULT,
+       G2D_PPC_SCALE,
+       G2D_PPC_ROTATE,
+       G2D_PPC_SCALE_ROTATE,
+       G2D_PPC_YUV2P,
+       G2D_PPC_YUV2P_SCALE,
+       G2D_PPC_YUV2P_ROTATE,
+       G2D_PPC_YUV2P_SCALE_ROTATE,
+       G2D_PPC_COLORFILL,
+       G2D_PPC_DST_DEFAULT,
+       G2D_PPC_DST_YUV2P,
+       G2D_PPC_DST_ROT,
+       G2D_PPC_END,
+};
+
+struct g2d_dvfs_table {
+       u32 lv;
+       u32 freq;
+};
+
 struct g2d_device {
        unsigned long           state;
 
@@ -67,12 +89,25 @@ struct g2d_device {
        struct dentry *debug_logs;
 
        atomic_t        prior_stats[G2D_PRIORITY_END];
+
+       struct mutex                    lock_qos;
+       struct list_head                qos_contexts;
+       u32 hw_ppc[G2D_PPC_END];
+
+       struct g2d_dvfs_table *dvfs_table;
+       u32 dvfs_table_cnt;
 };
 
 struct g2d_context {
        struct g2d_device       *g2d_dev;
        struct shared_buffer_info *hwfc_info;
        u32 priority;
+
+       struct pm_qos_request req;
+
+       struct list_head qos_node;
+       u64     r_bw;
+       u64     w_bw;
 };
 
 int g2d_device_run(struct g2d_device *g2d_dev, struct g2d_task *task);
index 18cdb5b9581fd79396346c4fab610699b1aa8df4..e1da54da2e97aba618960e3a7063d832c24a3556 100644 (file)
@@ -30,6 +30,7 @@
 #include "g2d_task.h"
 #include "g2d_uapi_process.h"
 #include "g2d_debug.h"
+#include "g2d_perf.h"
 
 #define MODULE_NAME "exynos-g2d"
 
@@ -250,6 +251,8 @@ static int g2d_open(struct inode *inode, struct file *filp)
        g2d_ctx->priority = G2D_DEFAULT_PRIORITY;
        atomic_inc(&g2d_dev->prior_stats[g2d_ctx->priority]);
 
+       INIT_LIST_HEAD(&g2d_ctx->qos_node);
+
        return 0;
 }
 
@@ -268,6 +271,8 @@ static int g2d_release(struct inode *inode, struct file *filp)
                kfree(g2d_ctx->hwfc_info);
        }
 
+       g2d_put_performance(g2d_ctx);
+
        kfree(g2d_ctx);
 
        return 0;
@@ -383,6 +388,20 @@ static long g2d_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
                break;
        }
+       case G2D_IOC_PERFORMANCE:
+       {
+               struct g2d_performance_data data;
+
+               if (copy_from_user(&data, (void __user *)arg, sizeof(data))) {
+                       dev_err(g2d_dev->dev,
+                               "%s: Failed to read perf data\n", __func__);
+                       ret = -EFAULT;
+                       break;
+               }
+               g2d_set_performance(ctx, &data);
+
+               break;
+       }
        }
 
        return ret;
@@ -424,6 +443,59 @@ static int g2d_notifier_event(struct notifier_block *this,
        return NOTIFY_OK;
 }
 
+static unsigned int g2d_default_ppc[G2D_PPC_END] =
+       {3500, 3200, 3500, 3000,
+       3500, 3100, 3000, 2800,
+       3800, 3500, 2800, 2500};
+
+static struct g2d_dvfs_table g2d_default_dvfs_table[] = {
+       {534000, 711000},
+       {400000, 534000},
+       {336000, 400000},
+       {267000, 356000},
+       {178000, 200000},
+       {107000, 134000},
+};
+
+static int g2d_parse_dt(struct g2d_device *g2d_dev)
+{
+       struct device *dev = g2d_dev->dev;
+       int i, len;
+
+       if (of_property_read_u32_array(dev->of_node, "hw_ppc",
+                       (u32 *)g2d_dev->hw_ppc,
+                       (size_t)(ARRAY_SIZE(g2d_dev->hw_ppc)))) {
+               dev_err(dev, "Failed to parse device tree for hw ppc");
+
+               for (i = 0; i < G2D_PPC_END; i++)
+                       g2d_dev->hw_ppc[i] = g2d_default_ppc[i];
+       }
+
+       len = of_property_count_u32_elems(dev->of_node, "g2d_dvfs_table");
+       if (len < 0)
+               g2d_dev->dvfs_table_cnt = ARRAY_SIZE(g2d_default_dvfs_table);
+       else
+               g2d_dev->dvfs_table_cnt = len / 2;
+
+       g2d_dev->dvfs_table = devm_kzalloc(dev,
+                               sizeof(struct g2d_dvfs_table) *
+                               g2d_dev->dvfs_table_cnt,
+                               GFP_KERNEL);
+       if (!g2d_dev->dvfs_table)
+               return -ENOMEM;
+
+       if (len < 0) {
+               memcpy(g2d_dev->dvfs_table, g2d_default_dvfs_table,
+                       sizeof(struct g2d_dvfs_table) *
+                       g2d_dev->dvfs_table_cnt);
+       } else {
+               of_property_read_u32_array(dev->of_node, "g2d_dvfs_table",
+                               (unsigned int *)g2d_dev->dvfs_table, len);
+       }
+
+       return 0;
+}
+
 static int g2d_probe(struct platform_device *pdev)
 {
        struct g2d_device *g2d_dev;
@@ -465,6 +537,10 @@ static int g2d_probe(struct platform_device *pdev)
 
        iovmm_set_fault_handler(&pdev->dev, g2d_iommu_fault_handler, g2d_dev);
 
+       ret = g2d_parse_dt(g2d_dev);
+       if (ret < 0)
+               return ret;
+
        ret = iovmm_activate(&pdev->dev);
        if (ret < 0) {
                dev_err(&pdev->dev, "Failed to activate iommu\n");
@@ -495,6 +571,9 @@ static int g2d_probe(struct platform_device *pdev)
        INIT_LIST_HEAD(&g2d_dev->tasks_free_hwfc);
        INIT_LIST_HEAD(&g2d_dev->tasks_prepared);
        INIT_LIST_HEAD(&g2d_dev->tasks_active);
+       INIT_LIST_HEAD(&g2d_dev->qos_contexts);
+
+       mutex_init(&g2d_dev->lock_qos);
 
        ret = g2d_create_tasks(g2d_dev);
        if (ret < 0) {
diff --git a/drivers/gpu/exynos/g2d/g2d_perf.c b/drivers/gpu/exynos/g2d/g2d_perf.c
new file mode 100644 (file)
index 0000000..0f9183d
--- /dev/null
@@ -0,0 +1,286 @@
+/*
+ * linux/drivers/gpu/exynos/g2d/g2d_perf.c
+ *
+ * Copyright (C) 2017 Samsung Electronics Co., Ltd.
+ *
+ * Contact: Hyesoo Yu <hyesoo.yu@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include "g2d.h"
+#include "g2d_perf.h"
+#include "g2d_task.h"
+#include "g2d_uapi.h"
+
+#ifdef CONFIG_PM_DEVFREQ
+static void g2d_pm_qos_update_devfreq(struct pm_qos_request *req, u32 freq)
+{
+       /*
+        * FIXME: PM_QOS_DEVICE_THROUGHPUT is not in the upstream kernel.
+        * if (!pm_qos_request_active(req))
+        *      pm_qos_add_request(req, PM_QOS_DEVICE_THROUGHPUT, 0);
+        */
+
+       pm_qos_update_request(req, freq);
+}
+
+static void g2d_pm_qos_remove_devfreq(struct pm_qos_request *req)
+{
+       if (pm_qos_request_active(req))
+               pm_qos_remove_request(req);
+}
+#else
+#define g2d_pm_qos_update_devfreq(req, freq) do { } while (0)
+#define g2d_pm_qos_remove_devfreq(req) do { } while (0)
+#endif
+
+static bool g2d_still_need_perf(struct g2d_device *g2d_dev)
+{
+       struct g2d_task *task;
+       unsigned long flags;
+
+       spin_lock_irqsave(&g2d_dev->lock_task, flags);
+       for (task = g2d_dev->tasks; task != NULL; task = task->next) {
+               if (!is_task_state_idle(task)) {
+                       spin_unlock_irqrestore(&g2d_dev->lock_task, flags);
+                       return true;
+               }
+       }
+       spin_unlock_irqrestore(&g2d_dev->lock_task, flags);
+
+       return false;
+}
+
+static void g2d_set_device_frequency(struct g2d_context *g2d_ctx,
+                                         struct g2d_performance_data *data)
+{
+       struct g2d_device *g2d_dev = g2d_ctx->g2d_dev;
+       struct g2d_performance_frame_data *frame;
+       struct g2d_performance_layer_data *layer, *pair;
+       unsigned int cycle, cycle_src, cycle_dst, ip_clock;
+       unsigned int rot_size, no_rot_size;
+       unsigned int dst_ppc, ppc[G2D_MAX_IMAGES];
+       int i, j;
+       char sc, yuv2p, rot, rot_skip, gap;
+
+       cycle = 0;
+       gap = false;
+
+       for (i = 0; i < data->num_frame; i++) {
+               frame = &data->frame[i];
+
+               rot_size = 0;
+               no_rot_size = 0;
+               cycle_src = 0;
+
+               /*
+                * The rotate variable means that the rotated layers and
+                * non-rotated layers are mixed.
+                * If all layers are rotated or are non rotated, that is
+                * excluded.
+                */
+               rot = 0;
+               for (j = 0; j < frame->num_layers; j++) {
+                       if (is_perf_layer_rotate(&frame->layer[j]))
+                               rot++;
+               }
+               rot_skip = (rot == frame->num_layers) ? 1 : 0;
+
+               for (j = 0; j < frame->num_layers; j++) {
+                       layer = &frame->layer[j];
+
+                       yuv2p = is_perf_layer_yuv2p(layer) ? 1 : 0;
+                       sc = is_perf_layer_scaling(layer) ? 1 : 0;
+                       rot = !rot_skip && is_perf_layer_rotate(layer) ? 1 : 0;
+
+                       ppc[j] =
+                               g2d_dev->hw_ppc[(yuv2p << 2) | (rot << 1) | sc];
+
+                       cycle_src += layer->pixelcount / ppc[j];
+
+                       /*
+                        * check rotated size for cycle_dst. rotated size is
+                        * bigger than non-rotated size, g2d write direction
+                        * is vertical, and it affects performance.
+                        */
+                       if (is_perf_layer_rotate(layer))
+                               rot_size += layer->pixelcount;
+                       else
+                               no_rot_size += layer->pixelcount;
+
+                       /*
+                        * The rotated layer affects the pair layer,
+                        * so we add the cycle using gap_ppc between pair
+                        * N layer and N+1 layer. The gap ppc is calculated
+                        * on odd layer and gap_pixelcount is pair layer's
+                        * nested region from 2 layers that means
+                        * the smaller region.
+                        */
+                       if (rot && (yuv2p || sc))
+                               gap = true;
+
+                       if (gap && (j & 0x1)) {
+                               unsigned int gap_pixelcount, gap_ppc;
+
+                               pair = &frame->layer[j - 1];
+                               gap = false;
+
+                               gap_ppc = (ppc[j] > ppc[j - 1]) ?
+                                       (ppc[j] - ppc[j - 1]) :
+                                       (ppc[j - 1] - ppc[j]);
+                               if (!gap_ppc)
+                                       continue;
+
+                               gap_ppc = (ppc[j] * ppc[j - 1]) / gap_ppc;
+
+                               gap_pixelcount = min(layer->pixelcount, pair->pixelcount);
+
+                               cycle_src += gap_pixelcount / gap_ppc;
+                       }
+               }
+
+               rot = (rot_size > no_rot_size) ? 1 : 0;
+               if (!rot && is_perf_frame_yuv2p(frame))
+                       dst_ppc = g2d_dev->hw_ppc[G2D_PPC_DST_YUV2P];
+               else if (!rot)
+                       dst_ppc = g2d_dev->hw_ppc[G2D_PPC_DST_DEFAULT];
+               else
+                       dst_ppc = g2d_dev->hw_ppc[G2D_PPC_DST_ROT];
+
+               cycle_dst = frame->target_pixelcount / dst_ppc;
+
+               cycle += max(cycle_src, cycle_dst);
+
+               if (is_perf_frame_colorfill(frame))
+                       cycle += frame->target_pixelcount /
+                                       g2d_dev->hw_ppc[G2D_PPC_COLORFILL];
+       }
+
+       /* ip_clock(Mhz) = cycles / time_in_ms * 1000 */
+       ip_clock = (cycle / 8) * 1000;
+
+       for (i = 0; i < g2d_dev->dvfs_table_cnt; i++) {
+               if (ip_clock > g2d_dev->dvfs_table[i].freq) {
+                       ip_clock = (i == 0) ?
+                                       g2d_dev->dvfs_table[i].lv :
+                                       g2d_dev->dvfs_table[i - 1].lv;
+                       break;
+               }
+       }
+
+       if (!ip_clock && !g2d_still_need_perf(g2d_dev))
+               g2d_pm_qos_remove_devfreq(&g2d_ctx->req);
+       else if (ip_clock)
+               g2d_pm_qos_update_devfreq(&g2d_ctx->req, ip_clock);
+}
+
+static void g2d_set_qos_frequency(struct g2d_context *g2d_ctx,
+                                         struct g2d_performance_data *data)
+{
+       struct g2d_device *g2d_dev = g2d_ctx->g2d_dev;
+       struct g2d_performance_frame_data *frame;
+       u32 cur_rbw, rbw;
+       u32 cur_wbw, wbw;
+       int i;
+
+       cur_rbw = 0;
+       cur_wbw = 0;
+       rbw = 0;
+       wbw = 0;
+
+       for (i = 0; i < data->num_frame; i++) {
+               frame = &data->frame[i];
+
+               rbw += frame->bandwidth_read;
+               wbw += frame->bandwidth_write;
+       }
+
+       if (list_empty(&g2d_ctx->qos_node) && !rbw && !wbw)
+               return;
+
+       if (!rbw && !rbw && g2d_still_need_perf(g2d_dev))
+               return;
+
+       mutex_lock(&g2d_dev->lock_qos);
+
+       if (!list_empty(&g2d_dev->qos_contexts)) {
+               struct g2d_context *ctx_qos;
+
+               ctx_qos = list_first_entry(&g2d_dev->qos_contexts,
+                                          struct g2d_context, qos_node);
+               cur_rbw = ctx_qos->r_bw;
+               cur_wbw = ctx_qos->w_bw;
+       }
+
+       /* this works although ctx is not attached to qos_contexts */
+       list_del_init(&g2d_ctx->qos_node);
+
+       g2d_ctx->r_bw = rbw;
+       g2d_ctx->w_bw = wbw;
+
+       if (rbw || wbw) {
+               struct list_head *node;
+
+               for (node = g2d_dev->qos_contexts.prev;
+                               node != &g2d_dev->qos_contexts;
+                                               node = node->prev) {
+                       struct g2d_context *curctx = list_entry(node,
+                                       struct g2d_context, qos_node);
+                       if ((curctx->r_bw + curctx->w_bw) > (rbw + wbw))
+                               break;
+               }
+               /*
+                * node always points to the head node or the smallest bw node
+                * among the larger bw nodes than qosnode
+                */
+               list_add(&g2d_ctx->qos_node, node);
+       }
+
+       if (!list_empty(&g2d_dev->qos_contexts)) {
+               struct g2d_context *ctx_qos;
+
+               ctx_qos = list_first_entry(&g2d_dev->qos_contexts,
+                                     struct g2d_context, qos_node);
+               /* bandwidth request is changed */
+               rbw = ctx_qos->r_bw;
+               wbw = ctx_qos->w_bw;
+       }
+
+       if ((rbw != cur_rbw) || (wbw != cur_wbw)) {
+               /*
+                * FIXME: BTS is not available for now
+                * struct bts_bw bw;
+                *
+                * bw.peak = ((rbw + wbw) / 1000) * BTS_PEAK_FPS_RATIO / 2;
+                * bw.write = wbw;
+                * bw.read = rbw;
+                * bts_update_bw(BTS_BW_G2D, bw);
+                */
+       }
+
+       mutex_unlock(&g2d_dev->lock_qos);
+}
+
+void g2d_set_performance(struct g2d_context *ctx,
+                               struct g2d_performance_data *data)
+{
+       g2d_set_qos_frequency(ctx, data);
+       g2d_set_device_frequency(ctx, data);
+}
+
+void g2d_put_performance(struct g2d_context *ctx)
+{
+       struct g2d_performance_data data;
+
+       data.num_frame = 0;
+
+       g2d_set_performance(ctx, &data);
+}
diff --git a/drivers/gpu/exynos/g2d/g2d_perf.h b/drivers/gpu/exynos/g2d/g2d_perf.h
new file mode 100644 (file)
index 0000000..a437824
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * linux/drivers/gpu/exynos/g2d/g2d_perf.h
+ *
+ * Copyright (C) 2017 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _G2D_PERF_H_
+#define _G2D_PEEF_H_
+
+struct g2d_context;
+struct g2d_performance_data;
+
+#define is_perf_layer_yuv2p(layer) \
+               (((layer)->layer_attr) & G2D_PERF_LAYER_YUV2P)
+#define is_perf_layer_scaling(layer) \
+               (((layer)->layer_attr) & G2D_PERF_LAYER_SCALING)
+#define is_perf_layer_rotate(layer) \
+               (((layer)->layer_attr) & G2D_PERF_LAYER_ROTATE)
+
+#define is_perf_frame_yuv2p(frame) \
+               (((frame)->frame_attr) & G2D_PERF_FRAME_YUV2P)
+#define is_perf_frame_colorfill(frame) \
+               (((frame)->frame_attr) & G2D_PERF_FRAME_SOLIDCOLORFILL)
+
+#define BTS_PEAK_FPS_RATIO 1667
+
+void g2d_set_performance(struct g2d_context *ctx,
+                               struct g2d_performance_data *data);
+void g2d_put_performance(struct g2d_context *ctx);
+
+#endif /* _G2D_PERF_H_ */
index 4b1806c7101a681811d3c904644facb3ef36e8c4..f57600b7a78248442a2bc2583ec249b719be49d4 100644 (file)
@@ -209,8 +209,60 @@ struct g2d_task_data {
        struct g2d_commands     commands;
 };
 
+/* flags of g2d_performance_layer_data.layer_attr */
+#define G2D_PERF_LAYER_ROTATE          (1 << 0)
+#define G2D_PERF_LAYER_SCALING         (1 << 1)
+#define G2D_PERF_LAYER_YUV2P           (1 << 2)
+
+/*
+ * struct g2d_performance_frame_data - description of needed performance.
+ * @pixelcount : the pixecount of layer, is used to calculate the frequency.
+ * @layer_attr : attribute of layer affecting performance.
+ */
+struct g2d_performance_layer_data {
+       __u32 pixelcount;
+       __u32 layer_attr;
+};
+
+/* flags of g2d_performance_frame_data.frame_attr */
+#define G2D_PERF_FRAME_SOLIDCOLORFILL  (1 << 0)
+#define G2D_PERF_FRAME_YUV2P   (1 << 1)
+
+/*
+ * struct g2d_performance_frame_data - description of needed performance.
+ * @layer : the pixel count of each layer to be processed.
+ * @bandwidth_read : the size of bandwidth to read when processing.
+ * @bandwidth_write : the size of bandwidth to write when processing.
+ * @frame_rate : frame per second of the job.
+ * @frame_attr : frame attribute
+ * @num_layers : the number of layers to be processed.
+ */
+struct g2d_performance_frame_data {
+       struct g2d_performance_layer_data layer[G2D_MAX_IMAGES];
+       __u32 bandwidth_read;
+       __u32 bandwidth_write;
+       __u32 target_pixelcount;
+       __u32 frame_rate;
+       __u32 frame_attr;
+       __u32 num_layers;
+};
+
+#define G2D_PERF_MAX_FRAMES 4
+
+/*
+ * struct g2d_performance_data - description the needed performance.
+ * @frame: the descriptions of each request's bandwidth and cycles in a frame.
+ * @num_frame : the number of g2d job requested in a frame.
+ */
+struct g2d_performance_data {
+       struct g2d_performance_frame_data frame[G2D_PERF_MAX_FRAMES];
+       __u32 num_frame;
+       __u32 reserved;
+};
+
 #define G2D_IOC_PROCESS                _IOWR('M', 4, struct g2d_task_data)
 #define G2D_IOC_PRIORITY               _IOR('M', 5, int32_t)
+#define G2D_IOC_PERFORMANCE    _IOR('M', 6, struct g2d_performance_data)
 
 #endif /* _G2D_UAPI_H_ */