mm: mm_event supports vmstat
authorMinchan Kim <minchan@google.com>
Tue, 15 Jan 2019 04:54:07 +0000 (13:54 +0900)
committerPDO SCM Team <hudsoncm@motorola.com>
Fri, 15 Nov 2019 06:58:51 +0000 (00:58 -0600)
Vmstat is significantly important to investigate MM problem.
We have solved many problmes with it via asking users to get
vmstat data periodically from the device, which manual way is
painful once we release the device or on hard reproducible
scenario.

This patch adds periodic vmstat dump into mm_event. It works
only if there are some events in compaction or reclaim. Thus,
unless there is memory pressure, it doesn't gather any vmstat
data. Default interval between each dump is 1000ms.
Admin can tweak it via

echo 2000 > /sys/kernel/debug/mm_event/vmstat_period_ms

Mot-CRs-fixed: (CR)

Bug: 80168800
Change-Id: I4c0e7237d7764c4ea79da00952e5de34ccbe4187
Signed-off-by: Minchan Kim <minchan@google.com>
Reviewed-on: https://gerrit.mot.com/1453728
SLTApproved: Slta Waiver
SME-Granted: SME Approvals Granted
Tested-by: Jira Key
Reviewed-by: Xiangpo Zhao <zhaoxp3@motorola.com>
Submit-Approved: Jira Key

include/linux/mm_event.h
include/trace/events/mm_event.h
mm/mm_event.c

index 4d6fe7f2c97c88c93e5070b6f99c52d4eae13b4a..724992376ff6a5bd7c37d9f9254aaf65aea597a4 100644 (file)
@@ -24,6 +24,23 @@ struct mm_event_task {
 
 struct task_struct;
 
+struct mm_event_vmstat {
+       unsigned long free;
+       unsigned long file;
+       unsigned long anon;
+       unsigned long slab;
+       unsigned long ws_refault;
+       unsigned long ws_activate;
+       unsigned long mapped;
+       unsigned long pgin;
+       unsigned long pgout;
+       unsigned long swpin;
+       unsigned long swpout;
+       unsigned long reclaim_steal;
+       unsigned long reclaim_scan;
+       unsigned long compact_scan;
+};
+
 #ifdef CONFIG_MM_EVENT_STAT
 void mm_event_task_init(struct task_struct *tsk);
 void mm_event_start(ktime_t *time);
index 6073701d900c4169d74b6b74b2a334fc88cf698d..555faa309fa94b221f224e398249e9a5f34b1b41 100644 (file)
@@ -7,8 +7,10 @@
 #include <linux/types.h>
 #include <linux/tracepoint.h>
 #include <linux/mm.h>
+#include <linux/mm_event.h>
 
 struct mm_event_task;
+struct mm_event_vmstat;
 
 #define show_mm_event_type(type)                                       \
        __print_symbolic(type,                                          \
@@ -46,6 +48,58 @@ TRACE_EVENT(mm_event_record,
                                        __entry->max_lat)
 );
 
+TRACE_EVENT(mm_event_vmstat_record,
+
+       TP_PROTO(struct mm_event_vmstat *vmstat),
+
+       TP_ARGS(vmstat),
+
+       TP_STRUCT__entry(
+               __field(unsigned long, free)
+               __field(unsigned long, file)
+               __field(unsigned long, anon)
+               __field(unsigned long, slab)
+               __field(unsigned long, ws_refault)
+               __field(unsigned long, ws_activate)
+               __field(unsigned long, mapped)
+               __field(unsigned long, pgin)
+               __field(unsigned long, pgout)
+               __field(unsigned long, swpin)
+               __field(unsigned long, swpout)
+               __field(unsigned long, reclaim_steal)
+               __field(unsigned long, reclaim_scan)
+               __field(unsigned long, compact_scan)
+       ),
+
+       TP_fast_assign(
+               __entry->free           = vmstat->free;
+               __entry->file           = vmstat->file;
+               __entry->anon           = vmstat->anon;
+               __entry->slab           = vmstat->slab;
+               __entry->ws_refault     = vmstat->ws_refault;
+               __entry->ws_activate    = vmstat->ws_activate;
+               __entry->mapped         = vmstat->mapped;
+               __entry->pgin           = vmstat->pgin;
+               __entry->pgout          = vmstat->pgout;
+               __entry->swpin          = vmstat->swpin;
+               __entry->swpout         = vmstat->swpout;
+               __entry->reclaim_steal  = vmstat->reclaim_steal;
+               __entry->reclaim_scan   = vmstat->reclaim_scan;
+               __entry->compact_scan   = vmstat->compact_scan;
+       ),
+
+       TP_printk("free=%lu file=%lu anon=%lu slab=%lu ws_refault=%lu "
+                 "ws_activate=%lu mapped=%lu pgin=%lu pgout=%lu, swpin=%lu "
+                 "swpout=%lu reclaim_steal=%lu reclaim_scan=%lu compact_scan=%lu",
+                       __entry->free, __entry->file,
+                       __entry->anon, __entry->slab,
+                       __entry->ws_refault, __entry->ws_activate,
+                       __entry->mapped, __entry->pgin, __entry->pgout,
+                       __entry->swpin, __entry->swpout,
+                       __entry->reclaim_steal, __entry->reclaim_scan,
+                       __entry->compact_scan)
+);
+
 #endif /* _TRACE_MM_EVENT_H */
 
 /* This part must be outside protection */
index af2e28d41b6a876979d40ad83cb1e9ed947f39e3..967f7d1e93aec2c3a8e519a75b5955d8043f40a9 100644 (file)
@@ -9,6 +9,9 @@
 #include <trace/events/mm_event.h>
 /* msec */
 static unsigned long period_ms = 500;
+static unsigned long vmstat_period_ms = 1000;
+static DEFINE_SPINLOCK(vmstat_lock);
+static unsigned long vmstat_next_period;
 
 void mm_event_task_init(struct task_struct *tsk)
 {
@@ -16,20 +19,76 @@ void mm_event_task_init(struct task_struct *tsk)
        tsk->next_period = 0;
 }
 
+static void record_vmstat(void)
+{
+       int cpu;
+       struct mm_event_vmstat vmstat;
+
+       if (!time_is_before_eq_jiffies(vmstat_next_period))
+               return;
+
+       /* Need double check under the lock */
+       spin_lock(&vmstat_lock);
+       if (!time_is_before_eq_jiffies(vmstat_next_period)) {
+               spin_unlock(&vmstat_lock);
+               return;
+       }
+       vmstat_next_period = jiffies + msecs_to_jiffies(vmstat_period_ms);
+       spin_unlock(&vmstat_lock);
+
+       memset(&vmstat, 0, sizeof(vmstat));
+       vmstat.free = global_zone_page_state(NR_FREE_PAGES);
+       vmstat.slab = global_node_page_state(NR_SLAB_RECLAIMABLE) +
+                       global_node_page_state(NR_SLAB_UNRECLAIMABLE);
+
+       vmstat.file = global_node_page_state(NR_ACTIVE_FILE) +
+                       global_node_page_state(NR_INACTIVE_FILE);
+       vmstat.anon = global_node_page_state(NR_ACTIVE_ANON) +
+                       global_node_page_state(NR_INACTIVE_ANON);
+
+       vmstat.ws_refault = global_node_page_state(WORKINGSET_REFAULT);
+       vmstat.ws_activate = global_node_page_state(WORKINGSET_ACTIVATE);
+       vmstat.mapped = global_node_page_state(NR_FILE_MAPPED);
+
+       /* No want to make lock dependency between vmstat_lock and hotplug */
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
+               struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
+
+               /* sectors to kbytes for PGPGIN/PGPGOUT */
+               vmstat.pgin += this->event[PGPGIN] / 2;
+               vmstat.pgout += this->event[PGPGOUT] / 2;
+               vmstat.swpin += this->event[PSWPIN];
+               vmstat.swpout += this->event[PSWPOUT];
+               vmstat.reclaim_steal += this->event[PGSTEAL_DIRECT] +
+                                       this->event[PGSTEAL_KSWAPD];
+               vmstat.reclaim_scan += this->event[PGSCAN_DIRECT] +
+                                       this->event[PGSCAN_KSWAPD];
+               vmstat.compact_scan += this->event[COMPACTFREE_SCANNED] +
+                                       this->event[COMPACTFREE_SCANNED];
+       }
+       put_online_cpus();
+       trace_mm_event_vmstat_record(&vmstat);
+}
+
 static void record_stat(void)
 {
        if (time_is_before_eq_jiffies(current->next_period)) {
                int i;
+               bool need_vmstat = false;
 
                for (i = 0; i < MM_TYPE_NUM; i++) {
                        if (current->mm_event[i].count == 0)
                                continue;
-
+                       if (i == MM_COMPACTION || i == MM_RECLAIM)
+                               need_vmstat = true;
                        trace_mm_event_record(i, &current->mm_event[i]);
                        memset(&current->mm_event[i], 0,
                                        sizeof(struct mm_event_task));
                }
                current->next_period = jiffies + msecs_to_jiffies(period_ms);
+               if (need_vmstat)
+                       record_vmstat();
        }
 }
 
@@ -72,8 +131,25 @@ static int period_ms_get(void *data, u64 *val)
        return 0;
 }
 
+static int vmstat_period_ms_set(void *data, u64 val)
+{
+       if (val < 1 || val > ULONG_MAX)
+               return -EINVAL;
+
+       vmstat_period_ms = (unsigned long)val;
+       return 0;
+}
+
+static int vmstat_period_ms_get(void *data, u64 *val)
+{
+       *val = vmstat_period_ms;
+       return 0;
+}
+
 DEFINE_SIMPLE_ATTRIBUTE(period_ms_operations, period_ms_get,
                        period_ms_set, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(vmstat_period_ms_operations, vmstat_period_ms_get,
+                       vmstat_period_ms_set, "%llu\n");
 
 static int __init mm_event_init(void)
 {
@@ -94,6 +170,14 @@ static int __init mm_event_init(void)
                return PTR_ERR(entry);
        }
 
+       entry = debugfs_create_file("vmstat_period_ms", 0644,
+                       mm_event_root, NULL, &vmstat_period_ms_operations);
+       if (IS_ERR(entry)) {
+               pr_warn("debugfs file vmstat_mm_event_task creation failed\n");
+               debugfs_remove_recursive(mm_event_root);
+               return PTR_ERR(entry);
+       }
+
        return 0;
 }
 subsys_initcall(mm_event_init);