drm/i915/gvt: rewrite the trace gvt:gvt_command using trace style approach
authorChangbin Du <changbin.du@intel.com>
Wed, 3 May 2017 01:20:10 +0000 (09:20 +0800)
committerZhenyu Wang <zhenyuw@linux.intel.com>
Thu, 8 Jun 2017 05:59:14 +0000 (13:59 +0800)
The gvt:gvt_command trace involve unnecessary overhead even this trace is
not enabled. We need improve it.

The kernel trace infrastructure provide a full api to define a trace event.
We should leverage them if possible. And one important thing is that a trace
point should store raw data but not format string.

This patch include two part work:
1) Refactor the gvt_command trace definition, including:
  o only store raw trace data.
  o use __dynamic_array() to declare a variable size buffer.
  o use __print_array() to format raw cmd data.
  o rename vm_id as vgpu_id.

2) Improve the trace invoking, including:
  o remove the cycles calculation for handler. We can get this data
    by any perf tool.
  o do not make a backup for raw cmd data which just doesn't make sense.

With this patch, this trace has no overhead if it is not enabled. And we are
trace style now.

The final output example:
  gvt workload 0-211   [000] ...1   120.555964: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e880, raw cmd {0x4000000}
  gvt workload 0-211   [000] ...1   120.556014: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e884, raw cmd {0x7a000004,0x1004000,0xe1511018,0x0,0x7d,0x0}
  gvt workload 0-211   [000] ...1   120.556062: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e89c, raw cmd {0x7a000004,0x140000,0x0,0x0,0x0,0x0}
  gvt workload 0-211   [000] ...1   120.556110: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e8b4, raw cmd {0x10400002,0xe1511018,0x0,0x7d}

Signed-off-by: Changbin Du <changbin.du@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
drivers/gpu/drm/i915/gvt/cmd_parser.c
drivers/gpu/drm/i915/gvt/trace.h

index 41b2c3aaa04a5ab82fd72da44369a78b15a0dfa4..5634eb1fa24bfe7045081219560c0d939579371c 100644 (file)
@@ -2414,53 +2414,13 @@ static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e)
        hash_add(gvt->cmd_table, &e->hlist, e->info->opcode);
 }
 
-#define GVT_MAX_CMD_LENGTH     20  /* In Dword */
-
-static void trace_cs_command(struct parser_exec_state *s,
-               cycles_t cost_pre_cmd_handler, cycles_t cost_cmd_handler)
-{
-       /* This buffer is used by ftrace to store all commands copied from
-        * guest gma space. Sometimes commands can cross pages, this should
-        * not be handled in ftrace logic. So this is just used as a
-        * 'bounce buffer'
-        */
-       u32 cmd_trace_buf[GVT_MAX_CMD_LENGTH];
-       int i;
-       u32 cmd_len = cmd_length(s);
-       /* The chosen value of GVT_MAX_CMD_LENGTH are just based on
-        * following two considerations:
-        * 1) From observation, most common ring commands is not that long.
-        *    But there are execeptions. So it indeed makes sence to observe
-        *    longer commands.
-        * 2) From the performance and debugging point of view, dumping all
-        *    contents of very commands is not necessary.
-        * We mgith shrink GVT_MAX_CMD_LENGTH or remove this trace event in
-        * future for performance considerations.
-        */
-       if (unlikely(cmd_len > GVT_MAX_CMD_LENGTH)) {
-               gvt_dbg_cmd("cmd length exceed tracing limitation!\n");
-               cmd_len = GVT_MAX_CMD_LENGTH;
-       }
-
-       for (i = 0; i < cmd_len; i++)
-               cmd_trace_buf[i] = cmd_val(s, i);
-
-       trace_gvt_command(s->vgpu->id, s->ring_id, s->ip_gma, cmd_trace_buf,
-                       cmd_len, s->buf_type == RING_BUFFER_INSTRUCTION,
-                       cost_pre_cmd_handler, cost_cmd_handler);
-}
-
 /* call the cmd handler, and advance ip */
 static int cmd_parser_exec(struct parser_exec_state *s)
 {
+       struct intel_vgpu *vgpu = s->vgpu;
        struct cmd_info *info;
        u32 cmd;
        int ret = 0;
-       cycles_t t0, t1, t2;
-       struct parser_exec_state s_before_advance_custom;
-       struct intel_vgpu *vgpu = s->vgpu;
-
-       t0 = get_cycles();
 
        cmd = cmd_val(s, 0);
 
@@ -2475,9 +2435,8 @@ static int cmd_parser_exec(struct parser_exec_state *s)
 
        s->info = info;
 
-       t1 = get_cycles();
-
-       s_before_advance_custom = *s;
+       trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va,
+                         cmd_length(s), s->buf_type);
 
        if (info->handler) {
                ret = info->handler(s);
@@ -2486,9 +2445,6 @@ static int cmd_parser_exec(struct parser_exec_state *s)
                        return ret;
                }
        }
-       t2 = get_cycles();
-
-       trace_cs_command(&s_before_advance_custom, t1 - t0, t2 - t1);
 
        if (!(info->flag & F_IP_ADVANCE_CUSTOM)) {
                ret = cmd_advance_default(s);
index 53a2d10cf3f12c02da61e23e9d62e6d0f1d4b0e9..9171291e36c62f57981a4bb94e4e6ff9cec7951e 100644 (file)
@@ -224,57 +224,37 @@ TRACE_EVENT(oos_sync,
        TP_printk("%s", __entry->buf)
 );
 
-#define MAX_CMD_STR_LEN        256
 TRACE_EVENT(gvt_command,
-               TP_PROTO(u8 vm_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len, bool ring_buffer_cmd, cycles_t cost_pre_cmd_handler, cycles_t cost_cmd_handler),
-
-               TP_ARGS(vm_id, ring_id, ip_gma, cmd_va, cmd_len, ring_buffer_cmd, cost_pre_cmd_handler, cost_cmd_handler),
-
-               TP_STRUCT__entry(
-                       __field(u8, vm_id)
-                       __field(u8, ring_id)
-                       __field(int, i)
-                       __array(char, tmp_buf, MAX_CMD_STR_LEN)
-                       __array(char, cmd_str, MAX_CMD_STR_LEN)
-                       ),
-
-               TP_fast_assign(
-                       __entry->vm_id = vm_id;
-                       __entry->ring_id = ring_id;
-                       __entry->cmd_str[0] = '\0';
-                       snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "VM(%d) Ring(%d): %s ip(%08x) pre handler cost (%llu), handler cost (%llu) ", vm_id, ring_id, ring_buffer_cmd ? "RB":"BB", ip_gma, cost_pre_cmd_handler, cost_cmd_handler);
-                       strcat(__entry->cmd_str, __entry->tmp_buf);
-                       entry->i = 0;
-                       while (cmd_len > 0) {
-                               if (cmd_len >= 8) {
-                                       snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x %08x %08x %08x %08x %08x %08x ",
-                                               cmd_va[__entry->i], cmd_va[__entry->i+1], cmd_va[__entry->i+2], cmd_va[__entry->i+3],
-                                               cmd_va[__entry->i+4], cmd_va[__entry->i+5], cmd_va[__entry->i+6], cmd_va[__entry->i+7]);
-                                       __entry->i += 8;
-                                       cmd_len -= 8;
-                                       strcat(__entry->cmd_str, __entry->tmp_buf);
-                               } else if (cmd_len >= 4) {
-                                       snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x %08x %08x ",
-                                               cmd_va[__entry->i], cmd_va[__entry->i+1], cmd_va[__entry->i+2], cmd_va[__entry->i+3]);
-                                       __entry->i += 4;
-                                       cmd_len -= 4;
-                                       strcat(__entry->cmd_str, __entry->tmp_buf);
-                               } else if (cmd_len >= 2) {
-                                       snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x ", cmd_va[__entry->i], cmd_va[__entry->i+1]);
-                                       __entry->i += 2;
-                                       cmd_len -= 2;
-                                       strcat(__entry->cmd_str, __entry->tmp_buf);
-                               } else if (cmd_len == 1) {
-                                       snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x ", cmd_va[__entry->i]);
-                                       __entry->i += 1;
-                                       cmd_len -= 1;
-                                       strcat(__entry->cmd_str, __entry->tmp_buf);
-                               }
-                       }
-                       strcat(__entry->cmd_str, "\n");
-               ),
+       TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len,
+                u32 buf_type),
+
+       TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type),
+
+       TP_STRUCT__entry(
+               __field(u8, vgpu_id)
+               __field(u8, ring_id)
+               __field(u32, ip_gma)
+               __field(u32, buf_type)
+               __field(u32, cmd_len)
+               __dynamic_array(u32, raw_cmd, cmd_len)
+       ),
+
+       TP_fast_assign(
+               __entry->vgpu_id = vgpu_id;
+               __entry->ring_id = ring_id;
+               __entry->ip_gma = ip_gma;
+               __entry->buf_type = buf_type;
+               __entry->cmd_len = cmd_len;
+               memcpy(__get_dynamic_array(raw_cmd), cmd_va, cmd_len * sizeof(*cmd_va));
+       ),
+
 
-               TP_printk("%s", __entry->cmd_str)
+       TP_printk("vgpu%d ring %d: buf_type %u, ip_gma %08x, raw cmd %s",
+               __entry->vgpu_id,
+               __entry->ring_id,
+               __entry->buf_type,
+               __entry->ip_gma,
+               __print_array(__get_dynamic_array(raw_cmd), __entry->cmd_len, 4))
 );
 #endif /* _GVT_TRACE_H_ */