From 6083ceb1ecb277f8977913383341622a592a605b Mon Sep 17 00:00:00 2001 From: wangwang Date: Wed, 13 Nov 2019 14:04:33 +0800 Subject: [PATCH] psi:kernel:oom reaper porting into samsung platform reaper can help to reclaim the memory in time, the knob will be set to true when init parses the init.rc conf file. Change-Id: I59f1173c0e46202904da6eeacb2fecc32c53232c --- Documentation/sysctl/vm.txt | 19 +++++++++++++++ include/linux/oom.h | 4 ++++ kernel/signal.c | 7 +++++- kernel/sysctl.c | 7 ++++++ mm/oom_kill.c | 48 ++++++++++++++++++++++++++++++------- 5 files changed, 76 insertions(+), 9 deletions(-) diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 1d1f2cb5abc8..b60e950d3a6a 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -50,6 +50,7 @@ Currently, these files are in /proc/sys/vm: - nr_trim_pages (only if CONFIG_MMU=n) - numa_zonelist_order - oom_dump_tasks +- reap_mem_on_sigkill - oom_kill_allocating_task - overcommit_kbytes - overcommit_memory @@ -656,6 +657,24 @@ The default value is 1 (enabled). ============================================================== +reap_mem_on_sigkill + +This enables or disables the memory reaping for a SIGKILL received +process and that the sending process must have the CAP_KILL capabilities. + +If this is set to 1, when a process receives SIGKILL from a process +that has the capability, CAP_KILL, the process is added into the oom_reaper +queue which can be picked up by the oom_reaper thread to reap the memory of +that process. This reaps for the process which received SIGKILL through +either sys_kill from user or kill_pid from kernel. + +If this is set to 0, we are not reaping memory of a SIGKILL, sent through +either sys_kill from user or kill_pid from kernel, received process. + +The default value is 0 (disabled). + +============================================================== + oom_kill_allocating_task This enables or disables killing the OOM-triggering task in diff --git a/include/linux/oom.h b/include/linux/oom.h index 6adac113e96d..1aa1b5133da0 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -117,4 +117,8 @@ extern struct task_struct *find_lock_task_mm(struct task_struct *p); extern int sysctl_oom_dump_tasks; extern int sysctl_oom_kill_allocating_task; extern int sysctl_panic_on_oom; +extern int sysctl_reap_mem_on_sigkill; + +/* calls for LMK reaper */ +extern void add_to_oom_reaper(struct task_struct *p); #endif /* _INCLUDE_LINUX_OOM_H */ diff --git a/kernel/signal.c b/kernel/signal.c index 619c6160f64f..5da5b95e006a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -40,6 +40,8 @@ #include #include #include +#include +#include #define CREATE_TRACE_POINTS #include @@ -1326,8 +1328,11 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) ret = check_kill_permission(sig, info, p); rcu_read_unlock(); - if (!ret && sig) + if (!ret && sig) { ret = do_send_sig_info(sig, info, p, true); + if (capable(CAP_KILL) && sig == SIGKILL) + add_to_oom_reaper(p); + } return ret; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 12d8cdce48c0..d3d2d466b528 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1298,6 +1298,13 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "reap_mem_on_sigkill", + .data = &sysctl_reap_mem_on_sigkill, + .maxlen = sizeof(sysctl_reap_mem_on_sigkill), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "overcommit_ratio", .data = &sysctl_overcommit_ratio, diff --git a/mm/oom_kill.c b/mm/oom_kill.c index fe0aac2348e5..da605357f99b 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -51,6 +51,7 @@ int sysctl_panic_on_oom; int sysctl_oom_kill_allocating_task; int sysctl_oom_dump_tasks = 1; +int sysctl_reap_mem_on_sigkill; DEFINE_MUTEX(oom_lock); @@ -627,14 +628,22 @@ static void wake_oom_reaper(struct task_struct *tsk) { if (!oom_reaper_th) return; + /* + * Move the lock here to avoid scenario of queuing + * the same task by both OOM killer and any other SIGKILL + * path. + */ + + spin_lock(&oom_reaper_lock); /* mm is already queued? */ - if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) + if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) { + spin_unlock(&oom_reaper_lock); return; + } get_task_struct(tsk); - spin_lock(&oom_reaper_lock); tsk->oom_reaper_list = oom_reaper_list; oom_reaper_list = tsk; spin_unlock(&oom_reaper_lock); @@ -659,6 +668,16 @@ static inline void wake_oom_reaper(struct task_struct *tsk) } #endif /* CONFIG_MMU */ +static void __mark_oom_victim(struct task_struct *tsk) +{ + struct mm_struct *mm = tsk->mm; + + if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) { + mmgrab(tsk->signal->oom_mm); + set_bit(MMF_OOM_VICTIM, &mm->flags); + } +} + /** * mark_oom_victim - mark the given task as OOM victim * @tsk: task to mark @@ -671,18 +690,13 @@ static inline void wake_oom_reaper(struct task_struct *tsk) */ static void mark_oom_victim(struct task_struct *tsk) { - struct mm_struct *mm = tsk->mm; - WARN_ON(oom_killer_disabled); /* OOM killer might race with memcg OOM */ if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE)) return; /* oom_mm is bound to the signal struct life time. */ - if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) { - mmgrab(tsk->signal->oom_mm); - set_bit(MMF_OOM_VICTIM, &mm->flags); - } + __mark_oom_victim(tsk); /* * Make sure that the task is woken up from uninterruptible sleep @@ -1114,3 +1128,21 @@ void pagefault_out_of_memory(void) out_of_memory(&oc); mutex_unlock(&oom_lock); } + +void add_to_oom_reaper(struct task_struct *p) +{ + if (!sysctl_reap_mem_on_sigkill) + return; + + p = find_lock_task_mm(p); + if (!p) + return; + + get_task_struct(p); + if (task_will_free_mem(p)) { + __mark_oom_victim(p); + wake_oom_reaper(p); + } + task_unlock(p); + put_task_struct(p); +} -- 2.20.1