+++ /dev/null
-What: /proc/<pid>/oom_adj
-When: August 2012
-Why: /proc/<pid>/oom_adj allows userspace to influence the oom killer's
- badness heuristic used to determine which task to kill when the kernel
- is out of memory.
-
- The badness heuristic has since been rewritten since the introduction of
- this tunable such that its meaning is deprecated. The value was
- implemented as a bitshift on a score generated by the badness()
- function that did not have any precise units of measure. With the
- rewrite, the score is given as a proportion of available memory to the
- task allocating pages, so using a bitshift which grows the score
- exponentially is, thus, impossible to tune with fine granularity.
-
- A much more powerful interface, /proc/<pid>/oom_score_adj, was
- introduced with the oom killer rewrite that allows users to increase or
- decrease the badness score linearly. This interface will replace
- /proc/<pid>/oom_adj.
-
- A warning will be emitted to the kernel log if an application uses this
- deprecated interface. After it is printed once, future warnings will be
- suppressed until the kernel is rebooted.
2 Modifying System Parameters
3 Per-Process Parameters
- 3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj - Adjust the oom-killer
+ 3.1 /proc/<pid>/oom_score_adj - Adjust the oom-killer
score
3.2 /proc/<pid>/oom_score - Display current oom-killer score
3.3 /proc/<pid>/io - Display the IO accounting fields
CHAPTER 3: PER-PROCESS PARAMETERS
------------------------------------------------------------------------------
-3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj- Adjust the oom-killer score
+3.1 /proc/<pid>/oom_score_adj- Adjust the oom-killer score
--------------------------------------------------------------------------------
-These file can be used to adjust the badness heuristic used to select which
+This file can be used to adjust the badness heuristic used to select which
process gets killed in out of memory conditions.
The badness heuristic assigns a value to each candidate task ranging from 0
equivalent to discounting 50% of the task's allowed memory from being considered
as scoring against the task.
-For backwards compatibility with previous kernels, /proc/<pid>/oom_adj may also
-be used to tune the badness score. Its acceptable values range from -16
-(OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17
-(OOM_DISABLE) to disable oom killing entirely for that task. Its value is
-scaled linearly with /proc/<pid>/oom_score_adj.
-
-Writing to /proc/<pid>/oom_score_adj or /proc/<pid>/oom_adj will change the
-other with its scaled value.
-
The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last
value set by a CAP_SYS_RESOURCE process. To reduce the value any lower
requires CAP_SYS_RESOURCE.
-NOTICE: /proc/<pid>/oom_adj is deprecated and will be removed, please see
-Documentation/feature-removal-schedule.txt.
-
Caveat: when a parent task is selected, the oom killer will sacrifice any first
generation children with separate address spaces instead, if possible. This
avoids servers and important system daemons from being killed and loses the
-------------------------------------------------------------
This file can be used to check the current score used by the oom-killer is for
-any given <pid>. Use it together with /proc/<pid>/oom_adj to tune which
-process should be killed in an out-of-memory situation.
-
+any given <pid>.
3.3 /proc/<pid>/io - Display the IO accounting fields
-------------------------------------------------------
.release = mem_release,
};
-static ssize_t oom_adjust_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
- char buffer[PROC_NUMBUF];
- size_t len;
- int oom_adjust = OOM_DISABLE;
- unsigned long flags;
-
- if (!task)
- return -ESRCH;
-
- if (lock_task_sighand(task, &flags)) {
- oom_adjust = task->signal->oom_adj;
- unlock_task_sighand(task, &flags);
- }
-
- put_task_struct(task);
-
- len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
-
- return simple_read_from_buffer(buf, count, ppos, buffer, len);
-}
-
-static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct task_struct *task;
- char buffer[PROC_NUMBUF];
- int oom_adjust;
- unsigned long flags;
- int err;
-
- memset(buffer, 0, sizeof(buffer));
- if (count > sizeof(buffer) - 1)
- count = sizeof(buffer) - 1;
- if (copy_from_user(buffer, buf, count)) {
- err = -EFAULT;
- goto out;
- }
-
- err = kstrtoint(strstrip(buffer), 0, &oom_adjust);
- if (err)
- goto out;
- if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
- oom_adjust != OOM_DISABLE) {
- err = -EINVAL;
- goto out;
- }
-
- task = get_proc_task(file->f_path.dentry->d_inode);
- if (!task) {
- err = -ESRCH;
- goto out;
- }
-
- task_lock(task);
- if (!task->mm) {
- err = -EINVAL;
- goto err_task_lock;
- }
-
- if (!lock_task_sighand(task, &flags)) {
- err = -ESRCH;
- goto err_task_lock;
- }
-
- if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
- err = -EACCES;
- goto err_sighand;
- }
-
- /*
- * Warn that /proc/pid/oom_adj is deprecated, see
- * Documentation/feature-removal-schedule.txt.
- */
- printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
- current->comm, task_pid_nr(current), task_pid_nr(task),
- task_pid_nr(task));
- task->signal->oom_adj = oom_adjust;
- /*
- * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
- * value is always attainable.
- */
- if (task->signal->oom_adj == OOM_ADJUST_MAX)
- task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX;
- else
- task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
- -OOM_DISABLE;
- trace_oom_score_adj_update(task);
-err_sighand:
- unlock_task_sighand(task, &flags);
-err_task_lock:
- task_unlock(task);
- put_task_struct(task);
-out:
- return err < 0 ? err : count;
-}
-
-static const struct file_operations proc_oom_adjust_operations = {
- .read = oom_adjust_read,
- .write = oom_adjust_write,
- .llseek = generic_file_llseek,
-};
-
static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
task->signal->oom_score_adj_min = oom_score_adj;
trace_oom_score_adj_update(task);
- /*
- * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
- * always attainable.
- */
- if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
- task->signal->oom_adj = OOM_DISABLE;
- else
- task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) /
- OOM_SCORE_ADJ_MAX;
+
err_sighand:
unlock_task_sighand(task, &flags);
err_task_lock:
REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif
INF("oom_score", S_IRUGO, proc_oom_score),
- REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("cgroup", S_IRUGO, proc_cgroup_operations),
#endif
INF("oom_score", S_IRUGO, proc_oom_score),
- REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),