psi:kernel:oom reaper porting into samsung platform
[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / mm / oom_kill.c
index dee0f75c301337af62156d2ae46d5c5391cc6127..da605357f99ba36d8b4490f79ecfe92a435079dc 100644 (file)
@@ -51,6 +51,7 @@
 int sysctl_panic_on_oom;
 int sysctl_oom_kill_allocating_task;
 int sysctl_oom_dump_tasks = 1;
+int sysctl_reap_mem_on_sigkill;
 
 DEFINE_MUTEX(oom_lock);
 
@@ -456,7 +457,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
        return false;
 }
 
-
 #ifdef CONFIG_MMU
 /*
  * OOM Reaper kernel thread which tries to reap the memory used by the OOM
@@ -467,16 +467,51 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
 static struct task_struct *oom_reaper_list;
 static DEFINE_SPINLOCK(oom_reaper_lock);
 
-static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
+void __oom_reap_task_mm(struct mm_struct *mm)
 {
-       struct mmu_gather tlb;
        struct vm_area_struct *vma;
+
+       /*
+        * Tell all users of get_user/copy_from_user etc... that the content
+        * is no longer stable. No barriers really needed because unmapping
+        * should imply barriers already and the reader would hit a page fault
+        * if it stumbled over a reaped memory.
+        */
+       set_bit(MMF_UNSTABLE, &mm->flags);
+
+       for (vma = mm->mmap ; vma; vma = vma->vm_next) {
+               if (!can_madv_dontneed_vma(vma))
+                       continue;
+
+               /*
+                * Only anonymous pages have a good chance to be dropped
+                * without additional steps which we cannot afford as we
+                * are OOM already.
+                *
+                * We do not even care about fs backed pages because all
+                * which are reclaimable have already been reclaimed and
+                * we do not want to block exit_mmap by keeping mm ref
+                * count elevated without a good reason.
+                */
+               if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
+                       struct mmu_gather tlb;
+
+                       tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end);
+                       unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
+                                        NULL);
+                       tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end);
+               }
+       }
+}
+
+static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
+{
        bool ret = true;
 
        /*
         * We have to make sure to not race with the victim exit path
         * and cause premature new oom victim selection:
-        * __oom_reap_task_mm           exit_mm
+        * oom_reap_task_mm             exit_mm
         *   mmget_not_zero
         *                                mmput
         *                                  atomic_dec_and_test
@@ -524,34 +559,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 
        trace_start_task_reaping(tsk->pid);
 
-       /*
-        * Tell all users of get_user/copy_from_user etc... that the content
-        * is no longer stable. No barriers really needed because unmapping
-        * should imply barriers already and the reader would hit a page fault
-        * if it stumbled over a reaped memory.
-        */
-       set_bit(MMF_UNSTABLE, &mm->flags);
-
-       tlb_gather_mmu(&tlb, mm, 0, -1);
-       for (vma = mm->mmap ; vma; vma = vma->vm_next) {
-               if (!can_madv_dontneed_vma(vma))
-                       continue;
+       __oom_reap_task_mm(mm);
 
-               /*
-                * Only anonymous pages have a good chance to be dropped
-                * without additional steps which we cannot afford as we
-                * are OOM already.
-                *
-                * We do not even care about fs backed pages because all
-                * which are reclaimable have already been reclaimed and
-                * we do not want to block exit_mmap by keeping mm ref
-                * count elevated without a good reason.
-                */
-               if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED))
-                       unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
-                                        NULL);
-       }
-       tlb_finish_mmu(&tlb, 0, -1);
        pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
                        task_pid_nr(tsk), tsk->comm,
                        K(get_mm_counter(mm, MM_ANONPAGES)),
@@ -572,13 +581,12 @@ static void oom_reap_task(struct task_struct *tsk)
        struct mm_struct *mm = tsk->signal->oom_mm;
 
        /* Retry the down_read_trylock(mmap_sem) a few times */
-       while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm))
+       while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
                schedule_timeout_idle(HZ/10);
 
        if (attempts <= MAX_OOM_REAP_RETRIES)
                goto done;
 
-
        pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
                task_pid_nr(tsk), tsk->comm);
        debug_show_all_locks();
@@ -620,14 +628,22 @@ static void wake_oom_reaper(struct task_struct *tsk)
 {
        if (!oom_reaper_th)
                return;
+    /*
+     * Move the lock here to avoid scenario of queuing
+     * the same task by both OOM killer and any other SIGKILL
+     * path.
+     */        
+
+    spin_lock(&oom_reaper_lock);
 
-       /* tsk is already queued? */
-       if (tsk == oom_reaper_list || tsk->oom_reaper_list)
+       /* mm is already queued? */
+       if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) {
+        spin_unlock(&oom_reaper_lock);
                return;
+    }
 
        get_task_struct(tsk);
 
-       spin_lock(&oom_reaper_lock);
        tsk->oom_reaper_list = oom_reaper_list;
        oom_reaper_list = tsk;
        spin_unlock(&oom_reaper_lock);
@@ -652,6 +668,16 @@ static inline void wake_oom_reaper(struct task_struct *tsk)
 }
 #endif /* CONFIG_MMU */
 
+static void __mark_oom_victim(struct task_struct *tsk)
+{
+       struct mm_struct *mm = tsk->mm;
+
+       if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
+               mmgrab(tsk->signal->oom_mm);
+               set_bit(MMF_OOM_VICTIM, &mm->flags);
+       }
+}
+
 /**
  * mark_oom_victim - mark the given task as OOM victim
  * @tsk: task to mark
@@ -664,16 +690,13 @@ static inline void wake_oom_reaper(struct task_struct *tsk)
  */
 static void mark_oom_victim(struct task_struct *tsk)
 {
-       struct mm_struct *mm = tsk->mm;
-
        WARN_ON(oom_killer_disabled);
        /* OOM killer might race with memcg OOM */
        if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
                return;
 
        /* oom_mm is bound to the signal struct life time. */
-       if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm))
-               mmgrab(tsk->signal->oom_mm);
+       __mark_oom_victim(tsk);
 
        /*
         * Make sure that the task is woken up from uninterruptible sleep
@@ -861,6 +884,13 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
         * still freeing memory.
         */
        read_lock(&tasklist_lock);
+
+       /*
+        * The task 'p' might have already exited before reaching here. The
+        * put_task_struct() will free task_struct 'p' while the loop still try
+        * to access the field of 'p', so, get an extra reference.
+        */
+       get_task_struct(p);
        for_each_thread(p, t) {
                list_for_each_entry(child, &t->children, sibling) {
                        unsigned int child_points;
@@ -880,6 +910,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
                        }
                }
        }
+       put_task_struct(p);
        read_unlock(&tasklist_lock);
 
        p = find_lock_task_mm(victim);
@@ -1097,3 +1128,21 @@ void pagefault_out_of_memory(void)
        out_of_memory(&oc);
        mutex_unlock(&oom_lock);
 }
+
+void add_to_oom_reaper(struct task_struct *p)
+{
+       if (!sysctl_reap_mem_on_sigkill)
+               return;
+
+       p = find_lock_task_mm(p);
+       if (!p)
+               return;
+
+       get_task_struct(p);
+       if (task_will_free_mem(p)) {
+               __mark_oom_victim(p);
+               wake_oom_reaper(p);
+       }
+       task_unlock(p);
+       put_task_struct(p);
+}