int sysctl_panic_on_oom;
int sysctl_oom_kill_allocating_task;
int sysctl_oom_dump_tasks = 1;
+int sysctl_reap_mem_on_sigkill;
DEFINE_MUTEX(oom_lock);
return false;
}
-
#ifdef CONFIG_MMU
/*
* OOM Reaper kernel thread which tries to reap the memory used by the OOM
static struct task_struct *oom_reaper_list;
static DEFINE_SPINLOCK(oom_reaper_lock);
-static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
+void __oom_reap_task_mm(struct mm_struct *mm)
{
- struct mmu_gather tlb;
struct vm_area_struct *vma;
+
+ /*
+ * Tell all users of get_user/copy_from_user etc... that the content
+ * is no longer stable. No barriers really needed because unmapping
+ * should imply barriers already and the reader would hit a page fault
+ * if it stumbled over a reaped memory.
+ */
+ set_bit(MMF_UNSTABLE, &mm->flags);
+
+ for (vma = mm->mmap ; vma; vma = vma->vm_next) {
+ if (!can_madv_dontneed_vma(vma))
+ continue;
+
+ /*
+ * Only anonymous pages have a good chance to be dropped
+ * without additional steps which we cannot afford as we
+ * are OOM already.
+ *
+ * We do not even care about fs backed pages because all
+ * which are reclaimable have already been reclaimed and
+ * we do not want to block exit_mmap by keeping mm ref
+ * count elevated without a good reason.
+ */
+ if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
+ struct mmu_gather tlb;
+
+ tlb_gather_mmu(&tlb, mm, vma->vm_start, vma->vm_end);
+ unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
+ NULL);
+ tlb_finish_mmu(&tlb, vma->vm_start, vma->vm_end);
+ }
+ }
+}
+
+static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
+{
bool ret = true;
/*
* We have to make sure to not race with the victim exit path
* and cause premature new oom victim selection:
- * __oom_reap_task_mm exit_mm
+ * oom_reap_task_mm exit_mm
* mmget_not_zero
* mmput
* atomic_dec_and_test
trace_start_task_reaping(tsk->pid);
- /*
- * Tell all users of get_user/copy_from_user etc... that the content
- * is no longer stable. No barriers really needed because unmapping
- * should imply barriers already and the reader would hit a page fault
- * if it stumbled over a reaped memory.
- */
- set_bit(MMF_UNSTABLE, &mm->flags);
-
- tlb_gather_mmu(&tlb, mm, 0, -1);
- for (vma = mm->mmap ; vma; vma = vma->vm_next) {
- if (!can_madv_dontneed_vma(vma))
- continue;
+ __oom_reap_task_mm(mm);
- /*
- * Only anonymous pages have a good chance to be dropped
- * without additional steps which we cannot afford as we
- * are OOM already.
- *
- * We do not even care about fs backed pages because all
- * which are reclaimable have already been reclaimed and
- * we do not want to block exit_mmap by keeping mm ref
- * count elevated without a good reason.
- */
- if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED))
- unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
- NULL);
- }
- tlb_finish_mmu(&tlb, 0, -1);
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
task_pid_nr(tsk), tsk->comm,
K(get_mm_counter(mm, MM_ANONPAGES)),
struct mm_struct *mm = tsk->signal->oom_mm;
/* Retry the down_read_trylock(mmap_sem) a few times */
- while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm))
+ while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
schedule_timeout_idle(HZ/10);
if (attempts <= MAX_OOM_REAP_RETRIES)
goto done;
-
pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
task_pid_nr(tsk), tsk->comm);
debug_show_all_locks();
{
if (!oom_reaper_th)
return;
+ /*
+ * Move the lock here to avoid scenario of queuing
+ * the same task by both OOM killer and any other SIGKILL
+ * path.
+ */
+
+ spin_lock(&oom_reaper_lock);
- /* tsk is already queued? */
- if (tsk == oom_reaper_list || tsk->oom_reaper_list)
+ /* mm is already queued? */
+ if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) {
+ spin_unlock(&oom_reaper_lock);
return;
+ }
get_task_struct(tsk);
- spin_lock(&oom_reaper_lock);
tsk->oom_reaper_list = oom_reaper_list;
oom_reaper_list = tsk;
spin_unlock(&oom_reaper_lock);
}
#endif /* CONFIG_MMU */
+static void __mark_oom_victim(struct task_struct *tsk)
+{
+ struct mm_struct *mm = tsk->mm;
+
+ if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
+ mmgrab(tsk->signal->oom_mm);
+ set_bit(MMF_OOM_VICTIM, &mm->flags);
+ }
+}
+
/**
* mark_oom_victim - mark the given task as OOM victim
* @tsk: task to mark
*/
static void mark_oom_victim(struct task_struct *tsk)
{
- struct mm_struct *mm = tsk->mm;
-
WARN_ON(oom_killer_disabled);
/* OOM killer might race with memcg OOM */
if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
return;
/* oom_mm is bound to the signal struct life time. */
- if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm))
- mmgrab(tsk->signal->oom_mm);
+ __mark_oom_victim(tsk);
/*
* Make sure that the task is woken up from uninterruptible sleep
* still freeing memory.
*/
read_lock(&tasklist_lock);
+
+ /*
+ * The task 'p' might have already exited before reaching here. The
+ * put_task_struct() will free task_struct 'p' while the loop still try
+ * to access the field of 'p', so, get an extra reference.
+ */
+ get_task_struct(p);
for_each_thread(p, t) {
list_for_each_entry(child, &t->children, sibling) {
unsigned int child_points;
}
}
}
+ put_task_struct(p);
read_unlock(&tasklist_lock);
p = find_lock_task_mm(victim);
out_of_memory(&oc);
mutex_unlock(&oom_lock);
}
+
+void add_to_oom_reaper(struct task_struct *p)
+{
+ if (!sysctl_reap_mem_on_sigkill)
+ return;
+
+ p = find_lock_task_mm(p);
+ if (!p)
+ return;
+
+ get_task_struct(p);
+ if (task_will_free_mem(p)) {
+ __mark_oom_victim(p);
+ wake_oom_reaper(p);
+ }
+ task_unlock(p);
+ put_task_struct(p);
+}