From d56593f27939d9841d43d91f584e601a16124d4d Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 15 Jan 2019 13:54:07 +0900 Subject: [PATCH] mm: mm_event supports vmstat Vmstat is significantly important to investigate MM problem. We have solved many problmes with it via asking users to get vmstat data periodically from the device, which manual way is painful once we release the device or on hard reproducible scenario. This patch adds periodic vmstat dump into mm_event. It works only if there are some events in compaction or reclaim. Thus, unless there is memory pressure, it doesn't gather any vmstat data. Default interval between each dump is 1000ms. Admin can tweak it via echo 2000 > /sys/kernel/debug/mm_event/vmstat_period_ms Mot-CRs-fixed: (CR) Bug: 80168800 Change-Id: I4c0e7237d7764c4ea79da00952e5de34ccbe4187 Signed-off-by: Minchan Kim Reviewed-on: https://gerrit.mot.com/1453728 SLTApproved: Slta Waiver SME-Granted: SME Approvals Granted Tested-by: Jira Key Reviewed-by: Xiangpo Zhao Submit-Approved: Jira Key --- include/linux/mm_event.h | 17 +++++++ include/trace/events/mm_event.h | 54 +++++++++++++++++++++ mm/mm_event.c | 86 ++++++++++++++++++++++++++++++++- 3 files changed, 156 insertions(+), 1 deletion(-) diff --git a/include/linux/mm_event.h b/include/linux/mm_event.h index 4d6fe7f2c97c..724992376ff6 100644 --- a/include/linux/mm_event.h +++ b/include/linux/mm_event.h @@ -24,6 +24,23 @@ struct mm_event_task { struct task_struct; +struct mm_event_vmstat { + unsigned long free; + unsigned long file; + unsigned long anon; + unsigned long slab; + unsigned long ws_refault; + unsigned long ws_activate; + unsigned long mapped; + unsigned long pgin; + unsigned long pgout; + unsigned long swpin; + unsigned long swpout; + unsigned long reclaim_steal; + unsigned long reclaim_scan; + unsigned long compact_scan; +}; + #ifdef CONFIG_MM_EVENT_STAT void mm_event_task_init(struct task_struct *tsk); void mm_event_start(ktime_t *time); diff --git a/include/trace/events/mm_event.h b/include/trace/events/mm_event.h index 6073701d900c..555faa309fa9 100644 --- a/include/trace/events/mm_event.h +++ b/include/trace/events/mm_event.h @@ -7,8 +7,10 @@ #include #include #include +#include struct mm_event_task; +struct mm_event_vmstat; #define show_mm_event_type(type) \ __print_symbolic(type, \ @@ -46,6 +48,58 @@ TRACE_EVENT(mm_event_record, __entry->max_lat) ); +TRACE_EVENT(mm_event_vmstat_record, + + TP_PROTO(struct mm_event_vmstat *vmstat), + + TP_ARGS(vmstat), + + TP_STRUCT__entry( + __field(unsigned long, free) + __field(unsigned long, file) + __field(unsigned long, anon) + __field(unsigned long, slab) + __field(unsigned long, ws_refault) + __field(unsigned long, ws_activate) + __field(unsigned long, mapped) + __field(unsigned long, pgin) + __field(unsigned long, pgout) + __field(unsigned long, swpin) + __field(unsigned long, swpout) + __field(unsigned long, reclaim_steal) + __field(unsigned long, reclaim_scan) + __field(unsigned long, compact_scan) + ), + + TP_fast_assign( + __entry->free = vmstat->free; + __entry->file = vmstat->file; + __entry->anon = vmstat->anon; + __entry->slab = vmstat->slab; + __entry->ws_refault = vmstat->ws_refault; + __entry->ws_activate = vmstat->ws_activate; + __entry->mapped = vmstat->mapped; + __entry->pgin = vmstat->pgin; + __entry->pgout = vmstat->pgout; + __entry->swpin = vmstat->swpin; + __entry->swpout = vmstat->swpout; + __entry->reclaim_steal = vmstat->reclaim_steal; + __entry->reclaim_scan = vmstat->reclaim_scan; + __entry->compact_scan = vmstat->compact_scan; + ), + + TP_printk("free=%lu file=%lu anon=%lu slab=%lu ws_refault=%lu " + "ws_activate=%lu mapped=%lu pgin=%lu pgout=%lu, swpin=%lu " + "swpout=%lu reclaim_steal=%lu reclaim_scan=%lu compact_scan=%lu", + __entry->free, __entry->file, + __entry->anon, __entry->slab, + __entry->ws_refault, __entry->ws_activate, + __entry->mapped, __entry->pgin, __entry->pgout, + __entry->swpin, __entry->swpout, + __entry->reclaim_steal, __entry->reclaim_scan, + __entry->compact_scan) +); + #endif /* _TRACE_MM_EVENT_H */ /* This part must be outside protection */ diff --git a/mm/mm_event.c b/mm/mm_event.c index af2e28d41b6a..967f7d1e93ae 100644 --- a/mm/mm_event.c +++ b/mm/mm_event.c @@ -9,6 +9,9 @@ #include /* msec */ static unsigned long period_ms = 500; +static unsigned long vmstat_period_ms = 1000; +static DEFINE_SPINLOCK(vmstat_lock); +static unsigned long vmstat_next_period; void mm_event_task_init(struct task_struct *tsk) { @@ -16,20 +19,76 @@ void mm_event_task_init(struct task_struct *tsk) tsk->next_period = 0; } +static void record_vmstat(void) +{ + int cpu; + struct mm_event_vmstat vmstat; + + if (!time_is_before_eq_jiffies(vmstat_next_period)) + return; + + /* Need double check under the lock */ + spin_lock(&vmstat_lock); + if (!time_is_before_eq_jiffies(vmstat_next_period)) { + spin_unlock(&vmstat_lock); + return; + } + vmstat_next_period = jiffies + msecs_to_jiffies(vmstat_period_ms); + spin_unlock(&vmstat_lock); + + memset(&vmstat, 0, sizeof(vmstat)); + vmstat.free = global_zone_page_state(NR_FREE_PAGES); + vmstat.slab = global_node_page_state(NR_SLAB_RECLAIMABLE) + + global_node_page_state(NR_SLAB_UNRECLAIMABLE); + + vmstat.file = global_node_page_state(NR_ACTIVE_FILE) + + global_node_page_state(NR_INACTIVE_FILE); + vmstat.anon = global_node_page_state(NR_ACTIVE_ANON) + + global_node_page_state(NR_INACTIVE_ANON); + + vmstat.ws_refault = global_node_page_state(WORKINGSET_REFAULT); + vmstat.ws_activate = global_node_page_state(WORKINGSET_ACTIVATE); + vmstat.mapped = global_node_page_state(NR_FILE_MAPPED); + + /* No want to make lock dependency between vmstat_lock and hotplug */ + get_online_cpus(); + for_each_online_cpu(cpu) { + struct vm_event_state *this = &per_cpu(vm_event_states, cpu); + + /* sectors to kbytes for PGPGIN/PGPGOUT */ + vmstat.pgin += this->event[PGPGIN] / 2; + vmstat.pgout += this->event[PGPGOUT] / 2; + vmstat.swpin += this->event[PSWPIN]; + vmstat.swpout += this->event[PSWPOUT]; + vmstat.reclaim_steal += this->event[PGSTEAL_DIRECT] + + this->event[PGSTEAL_KSWAPD]; + vmstat.reclaim_scan += this->event[PGSCAN_DIRECT] + + this->event[PGSCAN_KSWAPD]; + vmstat.compact_scan += this->event[COMPACTFREE_SCANNED] + + this->event[COMPACTFREE_SCANNED]; + } + put_online_cpus(); + trace_mm_event_vmstat_record(&vmstat); +} + static void record_stat(void) { if (time_is_before_eq_jiffies(current->next_period)) { int i; + bool need_vmstat = false; for (i = 0; i < MM_TYPE_NUM; i++) { if (current->mm_event[i].count == 0) continue; - + if (i == MM_COMPACTION || i == MM_RECLAIM) + need_vmstat = true; trace_mm_event_record(i, ¤t->mm_event[i]); memset(¤t->mm_event[i], 0, sizeof(struct mm_event_task)); } current->next_period = jiffies + msecs_to_jiffies(period_ms); + if (need_vmstat) + record_vmstat(); } } @@ -72,8 +131,25 @@ static int period_ms_get(void *data, u64 *val) return 0; } +static int vmstat_period_ms_set(void *data, u64 val) +{ + if (val < 1 || val > ULONG_MAX) + return -EINVAL; + + vmstat_period_ms = (unsigned long)val; + return 0; +} + +static int vmstat_period_ms_get(void *data, u64 *val) +{ + *val = vmstat_period_ms; + return 0; +} + DEFINE_SIMPLE_ATTRIBUTE(period_ms_operations, period_ms_get, period_ms_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(vmstat_period_ms_operations, vmstat_period_ms_get, + vmstat_period_ms_set, "%llu\n"); static int __init mm_event_init(void) { @@ -94,6 +170,14 @@ static int __init mm_event_init(void) return PTR_ERR(entry); } + entry = debugfs_create_file("vmstat_period_ms", 0644, + mm_event_root, NULL, &vmstat_period_ms_operations); + if (IS_ERR(entry)) { + pr_warn("debugfs file vmstat_mm_event_task creation failed\n"); + debugfs_remove_recursive(mm_event_root); + return PTR_ERR(entry); + } + return 0; } subsys_initcall(mm_event_init); -- 2.20.1