mm: introduce per-process mm event tracking feature

author Minchan Kim <minchan@google.com>

Mon, 6 Aug 2018 06:00:19 +0000 (15:00 +0900)

committer lingsen1 <lingsen1@lenovo.com>

Sun, 7 Feb 2021 09:37:07 +0000 (17:37 +0800)
author Minchan Kim <minchan@google.com>
Mon, 6 Aug 2018 06:00:19 +0000 (15:00 +0900)
committer lingsen1 <lingsen1@lenovo.com>
Sun, 7 Feb 2021 09:37:07 +0000 (17:37 +0800)
diff --git a/include/linux/mm_event.h b/include/linux/mm_event.h

new file mode 100644 (file)

index 0000000..5a9069e
--- /dev/null
+++ b/include/linux/mm_event.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MM_EVENT_H
+#define _LINUX_MM_EVENT_H
+
+#include <linux/types.h>
+#include <linux/ktime.h>
+
+enum mm_event_type {
+       MM_MIN_FAULT = 0,
+       MM_MAJ_FAULT,
+       MM_COMPACTION,
+       MM_RECLAIM,
+       MM_TYPE_NUM,
+};
+
+struct mm_event_task {
+       unsigned int count;
+       unsigned int max_lat;
+       u64 accm_lat;
+} __attribute__ ((packed));
+
+struct task_struct;
+
+#ifdef CONFIG_MM_EVENT_STAT
+void mm_event_task_init(struct task_struct *tsk);
+void mm_event_start(ktime_t *time);
+void mm_event_end(enum mm_event_type event, ktime_t start);
+#else
+static inline void mm_event_task_init(struct task_struct *tsk) {}
+static inline void mm_event_start(ktime_t *time) {}
+static inline void mm_event_end(enum mm_event_type event, ktime_t start) {}
+#endif /* _LINUX_MM_EVENT_H */
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h

index e99ac4d74444cbe2aec0aeba71e088195b81a7f2..afcd95fd05e8bc35f4444257c0e6a18597d93de1 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -8,6 +8,7 @@
   */
  
  #include <uapi/linux/sched.h>
+#include <linux/mm_event.h>
  
  #include <asm/current.h>
  
@@ -985,6 +986,10 @@ struct task_struct {
         struct rt_mutex_waiter          *pi_blocked_on;
  #endif
  
+#ifdef CONFIG_MM_EVENT_STAT
+       struct mm_event_task    mm_event[MM_TYPE_NUM];
+       unsigned long           next_period;
+#endif
  #ifdef CONFIG_DEBUG_MUTEXES
         /* Mutex deadlock detection: */
         struct mutex_waiter             *blocked_on;
diff --git a/include/trace/events/mm_event.h b/include/trace/events/mm_event.h

new file mode 100644 (file)

index 0000000..f43dee5
--- /dev/null
+++ b/include/trace/events/mm_event.h
@@ -0,0 +1,49 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mm_event
+
+#if !defined(_TRACE_MM_EVENT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MM_EVENT_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+#include <linux/mm.h>
+
+struct mm_event_task;
+
+#define show_mm_event_type(type)                                       \
+       __print_symbolic(type,                                          \
+       { MM_MIN_FAULT, "min_flt" },                                    \
+       { MM_MAJ_FAULT, "maj_flt" },                                    \
+       { MM_COMPACTION, "compaction" },                                \
+       { MM_RECLAIM, "reclaim" })
+
+TRACE_EVENT(mm_event_record,
+
+       TP_PROTO(enum mm_event_type type, struct mm_event_task *record),
+
+       TP_ARGS(type, record),
+
+       TP_STRUCT__entry(
+               __field(enum mm_event_type, type)
+               __field(unsigned int,   count)
+               __field(unsigned int,   avg_lat)
+               __field(unsigned int,   max_lat)
+       ),
+
+       TP_fast_assign(
+               __entry->type           = type;
+               __entry->count          = record->count;
+               __entry->avg_lat        = record->accm_lat / record->count;
+               __entry->max_lat        = record->max_lat;
+       ),
+
+       TP_printk("%s count=%d avg_lat=%u max_lat=%u",
+                                       show_mm_event_type(__entry->type),
+                                       __entry->count, __entry->avg_lat,
+                                       __entry->max_lat)
+);
+
+#endif /* _TRACE_MM_EVENT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/fork.c b/kernel/fork.c

index 5a34c13ca34b7b2e8ccd85e592c8dfc80a3069f0..5d81e13fe12796ff3bc113f9a1072972c7c42865 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1227,6 +1227,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
  
         tsk->min_flt = tsk->maj_flt = 0;
         tsk->nvcsw = tsk->nivcsw = 0;
+       mm_event_task_init(tsk);
  #ifdef CONFIG_DETECT_HUNG_TASK
         tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
  #endif
diff --git a/mm/Kconfig b/mm/Kconfig

index 656af28886eaedd4781daeff547966ff78d55131..62034754105d6a2a1a25107016294766b4d6f52f 100644 (file)
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -621,6 +621,22 @@ config ZSMALLOC_STAT
           information to userspace via debugfs.
           If unsure, say N.
  
+config MM_EVENT_STAT
+       bool "Track per-process MM event"
+       depends on MMU
+       help
+         This option enables per-process mm event stat(e.g., fault, reclaim,
+         compaction and so on ) with some interval(Default is 0.5sec).
+         Admin can see the stat from trace file via debugfs(e.g.,
+         /sys/kernel/debug/tracing/trace)
+
+         It includes max/average memory allocation latency for the interval
+         as well as event count so that admin can see what happens in VM side
+         (how many each event happens and how much processes spent time for
+         the MM event). If it's too large, that would be not good situation.
+
+         System can dump the trace into bugreport when user allows the dump.
+
  config GENERIC_EARLY_IOREMAP
         bool
  
diff --git a/mm/Makefile b/mm/Makefile

index 3c72e1ba63484af3791fe640ca570243f90f7c05..1430c0b6cf69b3d567ea5b61a2bbe3b21dea4d37 100644 (file)
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -28,6 +28,9 @@ mmu-$(CONFIG_MMU)     := gup.o highmem.o memory.o mincore.o \
                            rmap.o vmalloc.o
  
  
+ifdef CONFIG_MM_EVENT_STAT
+mmu-$(CONFIG_MMU)      += mm_event.o
+endif
  ifdef CONFIG_CROSS_MEMORY_ATTACH
  mmu-$(CONFIG_MMU)      += process_vm_access.o
  endif
diff --git a/mm/mm_event.c b/mm/mm_event.c

new file mode 100644 (file)

index 0000000..f2ba7b9
--- /dev/null
+++ b/mm/mm_event.c
@@ -0,0 +1,62 @@
+#include <linux/mm.h>
+#include <linux/mm_event.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/mm_event.h>
+
+void mm_event_task_init(struct task_struct *tsk)
+{
+       memset(tsk->mm_event, 0, sizeof(tsk->mm_event));
+       tsk->next_period = 0;
+}
+
+static void record_stat(void)
+{
+       if (time_is_before_eq_jiffies(current->next_period)) {
+               int i;
+
+               for (i = 0; i < MM_TYPE_NUM; i++) {
+                       if (current->mm_event[i].count == 0)
+                               continue;
+
+                       trace_mm_event_record(i, &current->mm_event[i]);
+                       memset(&current->mm_event[i], 0,
+                                       sizeof(struct mm_event_task));
+               }
+               current->next_period = jiffies + (HZ >> 1);
+       }
+}
+
+void mm_event_start(ktime_t *time)
+{
+       *time = ktime_get();
+}
+
+void mm_event_end(enum mm_event_type event, ktime_t start)
+{
+       s64 elapsed = ktime_us_delta(ktime_get(), start);
+
+       current->mm_event[event].count++;
+       current->mm_event[event].accm_lat += elapsed;
+       if (elapsed > current->mm_event[event].max_lat)
+               current->mm_event[event].max_lat = elapsed;
+       record_stat();
+}
+
+static struct dentry *mm_event_root;
+
+static int __init mm_event_init(void)
+{
+       mm_event_root = debugfs_create_dir("mm_event", NULL);
+       if (!mm_event_root) {
+               pr_warn("debugfs dir <mm_event> creation failed\n");
+               return PTR_ERR(mm_event_root);
+       }
+
+       return 0;
+}
+subsys_initcall(mm_event_init);
author	Minchan Kim <minchan@google.com>
	Mon, 6 Aug 2018 06:00:19 +0000 (15:00 +0900)
committer	lingsen1 <lingsen1@lenovo.com>
	Sun, 7 Feb 2021 09:37:07 +0000 (17:37 +0800)
include/linux/mm_event.h	[new file with mode: 0644]	patch \| blob
include/linux/sched.h		patch \| blob \| blame \| history
include/trace/events/mm_event.h	[new file with mode: 0644]	patch \| blob
kernel/fork.c		patch \| blob \| blame \| history
mm/Kconfig		patch \| blob \| blame \| history
mm/Makefile		patch \| blob \| blame \| history
mm/mm_event.c	[new file with mode: 0644]	patch \| blob