FROMLIST: binder: fix an ret value override
[GitHub/LineageOS/android_kernel_samsung_universal7580.git] / kernel / sys.c
CommitLineData
1da177e4
LT
1/*
2 * linux/kernel/sys.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
9984de1a 7#include <linux/export.h>
1da177e4
LT
8#include <linux/mm.h>
9#include <linux/utsname.h>
10#include <linux/mman.h>
1da177e4
LT
11#include <linux/reboot.h>
12#include <linux/prctl.h>
1da177e4
LT
13#include <linux/highuid.h>
14#include <linux/fs.h>
74da1ff7 15#include <linux/kmod.h>
cdd6c482 16#include <linux/perf_event.h>
3e88c553 17#include <linux/resource.h>
dc009d92
EB
18#include <linux/kernel.h>
19#include <linux/kexec.h>
1da177e4 20#include <linux/workqueue.h>
c59ede7b 21#include <linux/capability.h>
1da177e4
LT
22#include <linux/device.h>
23#include <linux/key.h>
24#include <linux/times.h>
25#include <linux/posix-timers.h>
26#include <linux/security.h>
27#include <linux/dcookies.h>
28#include <linux/suspend.h>
29#include <linux/tty.h>
7ed20e1a 30#include <linux/signal.h>
9f46080c 31#include <linux/cn_proc.h>
3cfc348b 32#include <linux/getcpu.h>
6eaeeaba 33#include <linux/task_io_accounting_ops.h>
1d9d02fe 34#include <linux/seccomp.h>
4047727e 35#include <linux/cpu.h>
e28cbf22 36#include <linux/personality.h>
e3d5a27d 37#include <linux/ptrace.h>
5ad4e53b 38#include <linux/fs_struct.h>
b32dfe37
CG
39#include <linux/file.h>
40#include <linux/mount.h>
5a0e3ad6 41#include <linux/gfp.h>
40dc166c 42#include <linux/syscore_ops.h>
be27425d
AK
43#include <linux/version.h>
44#include <linux/ctype.h>
3c2a0909
S
45#include <linux/mm.h>
46#include <linux/mempolicy.h>
47#include <linux/sched.h>
1da177e4
LT
48
49#include <linux/compat.h>
50#include <linux/syscalls.h>
00d7c05a 51#include <linux/kprobes.h>
acce292c 52#include <linux/user_namespace.h>
7fe5e042 53#include <linux/binfmts.h>
1da177e4 54
4a22f166
SR
55#include <linux/sched.h>
56#include <linux/rcupdate.h>
57#include <linux/uidgid.h>
58#include <linux/cred.h>
59
04c6862c 60#include <linux/kmsg_dump.h>
be27425d
AK
61/* Move somewhere else to avoid recompiling? */
62#include <generated/utsrelease.h>
04c6862c 63
1da177e4
LT
64#include <asm/uaccess.h>
65#include <asm/io.h>
66#include <asm/unistd.h>
67
68#ifndef SET_UNALIGN_CTL
69# define SET_UNALIGN_CTL(a,b) (-EINVAL)
70#endif
71#ifndef GET_UNALIGN_CTL
72# define GET_UNALIGN_CTL(a,b) (-EINVAL)
73#endif
74#ifndef SET_FPEMU_CTL
75# define SET_FPEMU_CTL(a,b) (-EINVAL)
76#endif
77#ifndef GET_FPEMU_CTL
78# define GET_FPEMU_CTL(a,b) (-EINVAL)
79#endif
80#ifndef SET_FPEXC_CTL
81# define SET_FPEXC_CTL(a,b) (-EINVAL)
82#endif
83#ifndef GET_FPEXC_CTL
84# define GET_FPEXC_CTL(a,b) (-EINVAL)
85#endif
651d765d
AB
86#ifndef GET_ENDIAN
87# define GET_ENDIAN(a,b) (-EINVAL)
88#endif
89#ifndef SET_ENDIAN
90# define SET_ENDIAN(a,b) (-EINVAL)
91#endif
8fb402bc
EB
92#ifndef GET_TSC_CTL
93# define GET_TSC_CTL(a) (-EINVAL)
94#endif
95#ifndef SET_TSC_CTL
96# define SET_TSC_CTL(a) (-EINVAL)
97#endif
1da177e4
LT
98
99/*
100 * this is where the system-wide overflow UID and GID are defined, for
101 * architectures that now have 32-bit UID/GID but didn't in the past
102 */
103
104int overflowuid = DEFAULT_OVERFLOWUID;
105int overflowgid = DEFAULT_OVERFLOWGID;
106
1da177e4
LT
107EXPORT_SYMBOL(overflowuid);
108EXPORT_SYMBOL(overflowgid);
1da177e4
LT
109
110/*
111 * the same as above, but for filesystems which can only store a 16-bit
112 * UID and GID. as such, this is needed on all architectures
113 */
114
115int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
116int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
117
118EXPORT_SYMBOL(fs_overflowuid);
119EXPORT_SYMBOL(fs_overflowgid);
120
121/*
122 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
123 */
124
125int C_A_D = 1;
9ec52099
CLG
126struct pid *cad_pid;
127EXPORT_SYMBOL(cad_pid);
1da177e4 128
3c2a0909
S
129int ignore_fs_panic = 0; // To prevent kernel panic by EIO during shutdown
130
131#if defined CONFIG_SEC_RESTRICT_SETUID
132int sec_check_execpath(struct mm_struct *mm, char *denypath);
133#if defined CONFIG_SEC_RESTRICT_ROOTING_LOG
134#define PRINT_LOG(...) printk(KERN_ERR __VA_ARGS__)
135#else
136#define PRINT_LOG(...)
137#endif // End of CONFIG_SEC_RESTRICT_ROOTING_LOG
138
139static int sec_restrict_uid(void)
140{
141 int ret = 0;
142 struct task_struct *parent_tsk;
143 const struct cred *parent_cred;
144
145 read_lock(&tasklist_lock);
146 parent_tsk = current->parent;
147 if (!parent_tsk) {
148 read_unlock(&tasklist_lock);
149 return 0;
150 }
151
152 get_task_struct(parent_tsk);
153 /* holding on to the task struct is enough so just release
154 * the tasklist lock here */
155 read_unlock(&tasklist_lock);
156
157 parent_cred = get_task_cred(parent_tsk);
158 if (!parent_cred)
159 goto out;
160 if (parent_cred->euid == 0 || parent_tsk->pid == 1) {
161 ret = 0;
162 } else if (sec_check_execpath(current->mm, "/system/bin/pppd")) {
163 PRINT_LOG("VPN allowed to use root permission");
164 ret = 0;
165 } else {
166 PRINT_LOG("Restricted changing UID. PID = %d(%s) PPID = %d(%s)\n",
167 current->pid, current->comm,
168 parent_tsk->pid, parent_tsk->comm);
169 ret = 1;
170 }
171 put_cred(parent_cred);
172out:
173 put_task_struct(parent_tsk);
174
175 return ret;
176}
177#endif // End of CONFIG_SEC_RESTRICT_SETUID
178
bd804eba
RW
179/*
180 * If set, this is used for preparing the system to power off.
181 */
182
183void (*pm_power_off_prepare)(void);
bd804eba 184
fc832ad3
SH
185/*
186 * Returns true if current's euid is same as p's uid or euid,
187 * or has CAP_SYS_NICE to p's user_ns.
188 *
189 * Called with rcu_read_lock, creds are safe
190 */
191static bool set_one_prio_perm(struct task_struct *p)
192{
193 const struct cred *cred = current_cred(), *pcred = __task_cred(p);
194
5af66203
EB
195 if (uid_eq(pcred->uid, cred->euid) ||
196 uid_eq(pcred->euid, cred->euid))
fc832ad3 197 return true;
c4a4d603 198 if (ns_capable(pcred->user_ns, CAP_SYS_NICE))
fc832ad3
SH
199 return true;
200 return false;
201}
202
c69e8d9c
DH
203/*
204 * set the priority of a task
205 * - the caller must hold the RCU read lock
206 */
1da177e4
LT
207static int set_one_prio(struct task_struct *p, int niceval, int error)
208{
209 int no_nice;
210
fc832ad3 211 if (!set_one_prio_perm(p)) {
1da177e4
LT
212 error = -EPERM;
213 goto out;
214 }
e43379f1 215 if (niceval < task_nice(p) && !can_nice(p, niceval)) {
1da177e4
LT
216 error = -EACCES;
217 goto out;
218 }
219 no_nice = security_task_setnice(p, niceval);
220 if (no_nice) {
221 error = no_nice;
222 goto out;
223 }
224 if (error == -ESRCH)
225 error = 0;
226 set_user_nice(p, niceval);
227out:
228 return error;
229}
230
754fe8d2 231SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
1da177e4
LT
232{
233 struct task_struct *g, *p;
234 struct user_struct *user;
86a264ab 235 const struct cred *cred = current_cred();
1da177e4 236 int error = -EINVAL;
41487c65 237 struct pid *pgrp;
7b44ab97 238 kuid_t uid;
1da177e4 239
3e88c553 240 if (which > PRIO_USER || which < PRIO_PROCESS)
1da177e4
LT
241 goto out;
242
243 /* normalize: avoid signed division (rounding problems) */
244 error = -ESRCH;
245 if (niceval < -20)
246 niceval = -20;
247 if (niceval > 19)
248 niceval = 19;
249
d4581a23 250 rcu_read_lock();
1da177e4
LT
251 read_lock(&tasklist_lock);
252 switch (which) {
253 case PRIO_PROCESS:
41487c65 254 if (who)
228ebcbe 255 p = find_task_by_vpid(who);
41487c65
EB
256 else
257 p = current;
1da177e4
LT
258 if (p)
259 error = set_one_prio(p, niceval, error);
260 break;
261 case PRIO_PGRP:
41487c65 262 if (who)
b488893a 263 pgrp = find_vpid(who);
41487c65
EB
264 else
265 pgrp = task_pgrp(current);
2d70b68d 266 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
1da177e4 267 error = set_one_prio(p, niceval, error);
2d70b68d 268 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
1da177e4
LT
269 break;
270 case PRIO_USER:
7b44ab97 271 uid = make_kuid(cred->user_ns, who);
74ba508f 272 user = cred->user;
1da177e4 273 if (!who)
078de5f7
EB
274 uid = cred->uid;
275 else if (!uid_eq(uid, cred->uid) &&
7b44ab97 276 !(user = find_user(uid)))
86a264ab 277 goto out_unlock; /* No processes for this user */
1da177e4 278
dfc6a736 279 do_each_thread(g, p) {
078de5f7 280 if (uid_eq(task_uid(p), uid))
1da177e4 281 error = set_one_prio(p, niceval, error);
dfc6a736 282 } while_each_thread(g, p);
078de5f7 283 if (!uid_eq(uid, cred->uid))
1da177e4
LT
284 free_uid(user); /* For find_user() */
285 break;
286 }
287out_unlock:
288 read_unlock(&tasklist_lock);
d4581a23 289 rcu_read_unlock();
1da177e4
LT
290out:
291 return error;
292}
293
294/*
295 * Ugh. To avoid negative return values, "getpriority()" will
296 * not return the normal nice-value, but a negated value that
297 * has been offset by 20 (ie it returns 40..1 instead of -20..19)
298 * to stay compatible.
299 */
754fe8d2 300SYSCALL_DEFINE2(getpriority, int, which, int, who)
1da177e4
LT
301{
302 struct task_struct *g, *p;
303 struct user_struct *user;
86a264ab 304 const struct cred *cred = current_cred();
1da177e4 305 long niceval, retval = -ESRCH;
41487c65 306 struct pid *pgrp;
7b44ab97 307 kuid_t uid;
1da177e4 308
3e88c553 309 if (which > PRIO_USER || which < PRIO_PROCESS)
1da177e4
LT
310 return -EINVAL;
311
70118837 312 rcu_read_lock();
1da177e4
LT
313 read_lock(&tasklist_lock);
314 switch (which) {
315 case PRIO_PROCESS:
41487c65 316 if (who)
228ebcbe 317 p = find_task_by_vpid(who);
41487c65
EB
318 else
319 p = current;
1da177e4
LT
320 if (p) {
321 niceval = 20 - task_nice(p);
322 if (niceval > retval)
323 retval = niceval;
324 }
325 break;
326 case PRIO_PGRP:
41487c65 327 if (who)
b488893a 328 pgrp = find_vpid(who);
41487c65
EB
329 else
330 pgrp = task_pgrp(current);
2d70b68d 331 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
1da177e4
LT
332 niceval = 20 - task_nice(p);
333 if (niceval > retval)
334 retval = niceval;
2d70b68d 335 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
1da177e4
LT
336 break;
337 case PRIO_USER:
7b44ab97 338 uid = make_kuid(cred->user_ns, who);
74ba508f 339 user = cred->user;
1da177e4 340 if (!who)
078de5f7
EB
341 uid = cred->uid;
342 else if (!uid_eq(uid, cred->uid) &&
7b44ab97 343 !(user = find_user(uid)))
86a264ab 344 goto out_unlock; /* No processes for this user */
1da177e4 345
dfc6a736 346 do_each_thread(g, p) {
078de5f7 347 if (uid_eq(task_uid(p), uid)) {
1da177e4
LT
348 niceval = 20 - task_nice(p);
349 if (niceval > retval)
350 retval = niceval;
351 }
dfc6a736 352 } while_each_thread(g, p);
078de5f7 353 if (!uid_eq(uid, cred->uid))
1da177e4
LT
354 free_uid(user); /* for find_user() */
355 break;
356 }
357out_unlock:
358 read_unlock(&tasklist_lock);
70118837 359 rcu_read_unlock();
1da177e4
LT
360
361 return retval;
362}
363
e4c94330
EB
364/**
365 * emergency_restart - reboot the system
366 *
367 * Without shutting down any hardware or taking any locks
368 * reboot the system. This is called when we know we are in
369 * trouble so this is our best effort to reboot. This is
370 * safe to call in interrupt context.
371 */
7c903473
EB
372void emergency_restart(void)
373{
04c6862c 374 kmsg_dump(KMSG_DUMP_EMERG);
7c903473
EB
375 machine_emergency_restart();
376}
377EXPORT_SYMBOL_GPL(emergency_restart);
378
ca195b7f 379void kernel_restart_prepare(char *cmd)
4a00ea1e 380{
e041c683 381 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
4a00ea1e 382 system_state = SYSTEM_RESTART;
3c2a0909
S
383
384 /* user process freeze before device shutdown */
385 freeze_processes();
b50fa7c8 386 usermodehelper_disable();
3c2a0909 387 ignore_fs_panic = 1;
4a00ea1e 388 device_shutdown();
e4c94330 389}
1e5d5331 390
c5f41752
AW
391/**
392 * register_reboot_notifier - Register function to be called at reboot time
393 * @nb: Info about notifier function to be called
394 *
395 * Registers a function with the list of functions
396 * to be called at reboot time.
397 *
398 * Currently always returns zero, as blocking_notifier_chain_register()
399 * always returns zero.
400 */
401int register_reboot_notifier(struct notifier_block *nb)
402{
403 return blocking_notifier_chain_register(&reboot_notifier_list, nb);
404}
405EXPORT_SYMBOL(register_reboot_notifier);
406
407/**
408 * unregister_reboot_notifier - Unregister previously registered reboot notifier
409 * @nb: Hook to be unregistered
410 *
411 * Unregisters a previously registered reboot
412 * notifier function.
413 *
414 * Returns zero on success, or %-ENOENT on failure.
415 */
416int unregister_reboot_notifier(struct notifier_block *nb)
417{
418 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
419}
420EXPORT_SYMBOL(unregister_reboot_notifier);
421
cf7df378
RH
422/* Add backwards compatibility for stable trees. */
423#ifndef PF_NO_SETAFFINITY
424#define PF_NO_SETAFFINITY PF_THREAD_BOUND
425#endif
426
427static void migrate_to_reboot_cpu(void)
428{
429 /* The boot cpu is always logical cpu 0 */
430 int cpu = 0;
431
432 cpu_hotplug_disable();
433
434 /* Make certain the cpu I'm about to reboot on is online */
435 if (!cpu_online(cpu))
436 cpu = cpumask_first(cpu_online_mask);
437
438 /* Prevent races with other tasks migrating this task */
439 current->flags |= PF_NO_SETAFFINITY;
440
441 /* Make certain I only run on the appropriate processor */
442 set_cpus_allowed_ptr(current, cpumask_of(cpu));
443}
444
1e5d5331
RD
445/**
446 * kernel_restart - reboot the system
447 * @cmd: pointer to buffer containing command to execute for restart
b8887e6e 448 * or %NULL
1e5d5331
RD
449 *
450 * Shutdown everything and perform a clean reboot.
451 * This is not safe to call in interrupt context.
452 */
e4c94330
EB
453void kernel_restart(char *cmd)
454{
455 kernel_restart_prepare(cmd);
cf7df378 456 migrate_to_reboot_cpu();
6f389a8f 457 syscore_shutdown();
756184b7 458 if (!cmd)
4a00ea1e 459 printk(KERN_EMERG "Restarting system.\n");
756184b7 460 else
4a00ea1e 461 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
04c6862c 462 kmsg_dump(KMSG_DUMP_RESTART);
4a00ea1e
EB
463 machine_restart(cmd);
464}
465EXPORT_SYMBOL_GPL(kernel_restart);
466
4ef7229f 467static void kernel_shutdown_prepare(enum system_states state)
729b4d4c 468{
e041c683 469 blocking_notifier_call_chain(&reboot_notifier_list,
729b4d4c
AS
470 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
471 system_state = state;
3c2a0909
S
472
473 /* user process freeze before device shutdown */
474 freeze_processes();
b50fa7c8 475 usermodehelper_disable();
3c2a0909 476 ignore_fs_panic = 1;
729b4d4c
AS
477 device_shutdown();
478}
e4c94330
EB
479/**
480 * kernel_halt - halt the system
481 *
482 * Shutdown everything and perform a clean system halt.
483 */
e4c94330
EB
484void kernel_halt(void)
485{
729b4d4c 486 kernel_shutdown_prepare(SYSTEM_HALT);
cf7df378 487 migrate_to_reboot_cpu();
40dc166c 488 syscore_shutdown();
4a00ea1e 489 printk(KERN_EMERG "System halted.\n");
04c6862c 490 kmsg_dump(KMSG_DUMP_HALT);
4a00ea1e
EB
491 machine_halt();
492}
729b4d4c 493
4a00ea1e
EB
494EXPORT_SYMBOL_GPL(kernel_halt);
495
e4c94330
EB
496/**
497 * kernel_power_off - power_off the system
498 *
499 * Shutdown everything and perform a clean system power_off.
500 */
e4c94330
EB
501void kernel_power_off(void)
502{
729b4d4c 503 kernel_shutdown_prepare(SYSTEM_POWER_OFF);
bd804eba
RW
504 if (pm_power_off_prepare)
505 pm_power_off_prepare();
cf7df378 506 migrate_to_reboot_cpu();
40dc166c 507 syscore_shutdown();
4a00ea1e 508 printk(KERN_EMERG "Power down.\n");
04c6862c 509 kmsg_dump(KMSG_DUMP_POWEROFF);
4a00ea1e
EB
510 machine_power_off();
511}
512EXPORT_SYMBOL_GPL(kernel_power_off);
6f15fa50
TG
513
514static DEFINE_MUTEX(reboot_mutex);
515
1da177e4
LT
516/*
517 * Reboot system call: for obvious reasons only root may call it,
518 * and even root needs to set up some magic numbers in the registers
519 * so that some mistake won't make this reboot the whole machine.
520 * You can also set the meaning of the ctrl-alt-del-key here.
521 *
522 * reboot doesn't sync: do that yourself before calling this.
523 */
754fe8d2
HC
524SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
525 void __user *, arg)
1da177e4 526{
923c7538 527 struct pid_namespace *pid_ns = task_active_pid_ns(current);
1da177e4 528 char buffer[256];
3d26dcf7 529 int ret = 0;
1da177e4
LT
530
531 /* We only trust the superuser with rebooting the system. */
923c7538 532 if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT))
1da177e4
LT
533 return -EPERM;
534
535 /* For safety, we require "magic" arguments. */
536 if (magic1 != LINUX_REBOOT_MAGIC1 ||
537 (magic2 != LINUX_REBOOT_MAGIC2 &&
538 magic2 != LINUX_REBOOT_MAGIC2A &&
539 magic2 != LINUX_REBOOT_MAGIC2B &&
540 magic2 != LINUX_REBOOT_MAGIC2C))
541 return -EINVAL;
542
cf3f8921
DL
543 /*
544 * If pid namespaces are enabled and the current task is in a child
545 * pid_namespace, the command is handled by reboot_pid_ns() which will
546 * call do_exit().
547 */
923c7538 548 ret = reboot_pid_ns(pid_ns, cmd);
cf3f8921
DL
549 if (ret)
550 return ret;
551
5e38291d
EB
552 /* Instead of trying to make the power_off code look like
553 * halt when pm_power_off is not set do it the easy way.
554 */
555 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
556 cmd = LINUX_REBOOT_CMD_HALT;
557
6f15fa50 558 mutex_lock(&reboot_mutex);
1da177e4
LT
559 switch (cmd) {
560 case LINUX_REBOOT_CMD_RESTART:
4a00ea1e 561 kernel_restart(NULL);
1da177e4
LT
562 break;
563
564 case LINUX_REBOOT_CMD_CAD_ON:
565 C_A_D = 1;
566 break;
567
568 case LINUX_REBOOT_CMD_CAD_OFF:
569 C_A_D = 0;
570 break;
571
572 case LINUX_REBOOT_CMD_HALT:
3c2a0909 573/* kernel_halt();
1da177e4 574 do_exit(0);
3c2a0909
S
575 panic("cannot halt"); */
576 kernel_restart(NULL);
577 break;
1da177e4
LT
578
579 case LINUX_REBOOT_CMD_POWER_OFF:
4a00ea1e 580 kernel_power_off();
1da177e4
LT
581 do_exit(0);
582 break;
583
584 case LINUX_REBOOT_CMD_RESTART2:
585 if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
6f15fa50
TG
586 ret = -EFAULT;
587 break;
1da177e4
LT
588 }
589 buffer[sizeof(buffer) - 1] = '\0';
590
4a00ea1e 591 kernel_restart(buffer);
1da177e4
LT
592 break;
593
3ab83521 594#ifdef CONFIG_KEXEC
dc009d92 595 case LINUX_REBOOT_CMD_KEXEC:
3d26dcf7
AK
596 ret = kernel_kexec();
597 break;
3ab83521 598#endif
4a00ea1e 599
b0cb1a19 600#ifdef CONFIG_HIBERNATION
1da177e4 601 case LINUX_REBOOT_CMD_SW_SUSPEND:
3d26dcf7
AK
602 ret = hibernate();
603 break;
1da177e4
LT
604#endif
605
606 default:
3d26dcf7
AK
607 ret = -EINVAL;
608 break;
1da177e4 609 }
6f15fa50 610 mutex_unlock(&reboot_mutex);
3d26dcf7 611 return ret;
1da177e4
LT
612}
613
3c2a0909
S
614extern void do_emergency_remount(struct work_struct *work);
615
65f27f38 616static void deferred_cad(struct work_struct *dummy)
1da177e4 617{
3c2a0909 618 do_emergency_remount(NULL);
abcd9e51 619 kernel_restart(NULL);
1da177e4
LT
620}
621
622/*
623 * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
624 * As it's called within an interrupt, it may NOT sync: the only choice
625 * is whether to reboot at once, or just ignore the ctrl-alt-del.
626 */
627void ctrl_alt_del(void)
628{
65f27f38 629 static DECLARE_WORK(cad_work, deferred_cad);
1da177e4
LT
630
631 if (C_A_D)
632 schedule_work(&cad_work);
633 else
9ec52099 634 kill_cad_pid(SIGINT, 1);
1da177e4
LT
635}
636
1da177e4
LT
637/*
638 * Unprivileged users may change the real gid to the effective gid
639 * or vice versa. (BSD-style)
640 *
641 * If you set the real gid at all, or set the effective gid to a value not
642 * equal to the real gid, then the saved gid is set to the new effective gid.
643 *
644 * This makes it possible for a setgid program to completely drop its
645 * privileges, which is often a useful assertion to make when you are doing
646 * a security audit over a program.
647 *
648 * The general idea is that a program which uses just setregid() will be
649 * 100% compatible with BSD. A program which uses just setgid() will be
650 * 100% compatible with POSIX with saved IDs.
651 *
652 * SMP: There are not races, the GIDs are checked only by filesystem
653 * operations (as far as semantic preservation is concerned).
654 */
ae1251ab 655SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
1da177e4 656{
a29c33f4 657 struct user_namespace *ns = current_user_ns();
d84f4f99
DH
658 const struct cred *old;
659 struct cred *new;
1da177e4 660 int retval;
a29c33f4
EB
661 kgid_t krgid, kegid;
662
3c2a0909
S
663#if defined CONFIG_SEC_RESTRICT_SETUID
664 if(rgid == 0 || egid == 0)
665 {
666 if(sec_restrict_uid())
667 return -EACCES;
668 }
669#endif // End of CONFIG_SEC_RESTRICT_SETUID
670
a29c33f4
EB
671 krgid = make_kgid(ns, rgid);
672 kegid = make_kgid(ns, egid);
673
674 if ((rgid != (gid_t) -1) && !gid_valid(krgid))
675 return -EINVAL;
676 if ((egid != (gid_t) -1) && !gid_valid(kegid))
677 return -EINVAL;
1da177e4 678
d84f4f99
DH
679 new = prepare_creds();
680 if (!new)
681 return -ENOMEM;
682 old = current_cred();
683
d84f4f99 684 retval = -EPERM;
1da177e4 685 if (rgid != (gid_t) -1) {
a29c33f4
EB
686 if (gid_eq(old->gid, krgid) ||
687 gid_eq(old->egid, krgid) ||
fc832ad3 688 nsown_capable(CAP_SETGID))
a29c33f4 689 new->gid = krgid;
1da177e4 690 else
d84f4f99 691 goto error;
1da177e4
LT
692 }
693 if (egid != (gid_t) -1) {
a29c33f4
EB
694 if (gid_eq(old->gid, kegid) ||
695 gid_eq(old->egid, kegid) ||
696 gid_eq(old->sgid, kegid) ||
fc832ad3 697 nsown_capable(CAP_SETGID))
a29c33f4 698 new->egid = kegid;
756184b7 699 else
d84f4f99 700 goto error;
1da177e4 701 }
d84f4f99 702
1da177e4 703 if (rgid != (gid_t) -1 ||
a29c33f4 704 (egid != (gid_t) -1 && !gid_eq(kegid, old->gid)))
d84f4f99
DH
705 new->sgid = new->egid;
706 new->fsgid = new->egid;
707
708 return commit_creds(new);
709
710error:
711 abort_creds(new);
712 return retval;
1da177e4
LT
713}
714
715/*
716 * setgid() is implemented like SysV w/ SAVED_IDS
717 *
718 * SMP: Same implicit races as above.
719 */
ae1251ab 720SYSCALL_DEFINE1(setgid, gid_t, gid)
1da177e4 721{
a29c33f4 722 struct user_namespace *ns = current_user_ns();
d84f4f99
DH
723 const struct cred *old;
724 struct cred *new;
1da177e4 725 int retval;
a29c33f4
EB
726 kgid_t kgid;
727
3c2a0909
S
728#if defined CONFIG_SEC_RESTRICT_SETUID
729 if(gid == 0)
730 {
731 if(sec_restrict_uid())
732 return -EACCES;
733 }
734#endif // End of CONFIG_SEC_RESTRICT_SETUID
735
a29c33f4
EB
736 kgid = make_kgid(ns, gid);
737 if (!gid_valid(kgid))
738 return -EINVAL;
1da177e4 739
d84f4f99
DH
740 new = prepare_creds();
741 if (!new)
742 return -ENOMEM;
743 old = current_cred();
744
d84f4f99 745 retval = -EPERM;
fc832ad3 746 if (nsown_capable(CAP_SETGID))
a29c33f4
EB
747 new->gid = new->egid = new->sgid = new->fsgid = kgid;
748 else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid))
749 new->egid = new->fsgid = kgid;
1da177e4 750 else
d84f4f99 751 goto error;
1da177e4 752
d84f4f99
DH
753 return commit_creds(new);
754
755error:
756 abort_creds(new);
757 return retval;
1da177e4 758}
54e99124 759
d84f4f99
DH
760/*
761 * change the user struct in a credentials set to match the new UID
762 */
763static int set_user(struct cred *new)
1da177e4
LT
764{
765 struct user_struct *new_user;
766
078de5f7 767 new_user = alloc_uid(new->uid);
1da177e4
LT
768 if (!new_user)
769 return -EAGAIN;
770
72fa5997
VK
771 /*
772 * We don't fail in case of NPROC limit excess here because too many
773 * poorly written programs don't check set*uid() return code, assuming
774 * it never fails if called by root. We may still enforce NPROC limit
775 * for programs doing set*uid()+execve() by harmlessly deferring the
776 * failure to the execve() stage.
777 */
78d7d407 778 if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
72fa5997
VK
779 new_user != INIT_USER)
780 current->flags |= PF_NPROC_EXCEEDED;
781 else
782 current->flags &= ~PF_NPROC_EXCEEDED;
1da177e4 783
d84f4f99
DH
784 free_uid(new->user);
785 new->user = new_user;
1da177e4
LT
786 return 0;
787}
788
789/*
790 * Unprivileged users may change the real uid to the effective uid
791 * or vice versa. (BSD-style)
792 *
793 * If you set the real uid at all, or set the effective uid to a value not
794 * equal to the real uid, then the saved uid is set to the new effective uid.
795 *
796 * This makes it possible for a setuid program to completely drop its
797 * privileges, which is often a useful assertion to make when you are doing
798 * a security audit over a program.
799 *
800 * The general idea is that a program which uses just setreuid() will be
801 * 100% compatible with BSD. A program which uses just setuid() will be
802 * 100% compatible with POSIX with saved IDs.
803 */
ae1251ab 804SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
1da177e4 805{
a29c33f4 806 struct user_namespace *ns = current_user_ns();
d84f4f99
DH
807 const struct cred *old;
808 struct cred *new;
1da177e4 809 int retval;
a29c33f4
EB
810 kuid_t kruid, keuid;
811
3c2a0909
S
812#if defined CONFIG_SEC_RESTRICT_SETUID
813 if(ruid == 0 || euid == 0)
814 {
815 if(sec_restrict_uid())
816 return -EACCES;
817 }
818#endif // End of CONFIG_SEC_RESTRICT_SETUID
819
a29c33f4
EB
820 kruid = make_kuid(ns, ruid);
821 keuid = make_kuid(ns, euid);
822
823 if ((ruid != (uid_t) -1) && !uid_valid(kruid))
824 return -EINVAL;
825 if ((euid != (uid_t) -1) && !uid_valid(keuid))
826 return -EINVAL;
1da177e4 827
d84f4f99
DH
828 new = prepare_creds();
829 if (!new)
830 return -ENOMEM;
831 old = current_cred();
832
d84f4f99 833 retval = -EPERM;
1da177e4 834 if (ruid != (uid_t) -1) {
a29c33f4
EB
835 new->uid = kruid;
836 if (!uid_eq(old->uid, kruid) &&
837 !uid_eq(old->euid, kruid) &&
fc832ad3 838 !nsown_capable(CAP_SETUID))
d84f4f99 839 goto error;
1da177e4
LT
840 }
841
842 if (euid != (uid_t) -1) {
a29c33f4
EB
843 new->euid = keuid;
844 if (!uid_eq(old->uid, keuid) &&
845 !uid_eq(old->euid, keuid) &&
846 !uid_eq(old->suid, keuid) &&
fc832ad3 847 !nsown_capable(CAP_SETUID))
d84f4f99 848 goto error;
1da177e4
LT
849 }
850
a29c33f4 851 if (!uid_eq(new->uid, old->uid)) {
54e99124
DG
852 retval = set_user(new);
853 if (retval < 0)
854 goto error;
855 }
1da177e4 856 if (ruid != (uid_t) -1 ||
a29c33f4 857 (euid != (uid_t) -1 && !uid_eq(keuid, old->uid)))
d84f4f99
DH
858 new->suid = new->euid;
859 new->fsuid = new->euid;
1da177e4 860
d84f4f99
DH
861 retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
862 if (retval < 0)
863 goto error;
1da177e4 864
d84f4f99 865 return commit_creds(new);
1da177e4 866
d84f4f99
DH
867error:
868 abort_creds(new);
869 return retval;
870}
1da177e4
LT
871
872/*
873 * setuid() is implemented like SysV with SAVED_IDS
874 *
875 * Note that SAVED_ID's is deficient in that a setuid root program
876 * like sendmail, for example, cannot set its uid to be a normal
877 * user and then switch back, because if you're root, setuid() sets
878 * the saved uid too. If you don't like this, blame the bright people
879 * in the POSIX committee and/or USG. Note that the BSD-style setreuid()
880 * will allow a root program to temporarily drop privileges and be able to
881 * regain them by swapping the real and effective uid.
882 */
ae1251ab 883SYSCALL_DEFINE1(setuid, uid_t, uid)
1da177e4 884{
a29c33f4 885 struct user_namespace *ns = current_user_ns();
d84f4f99
DH
886 const struct cred *old;
887 struct cred *new;
1da177e4 888 int retval;
a29c33f4
EB
889 kuid_t kuid;
890
3c2a0909
S
891#if defined CONFIG_SEC_RESTRICT_SETUID
892 if(uid == 0)
893 {
894 if(sec_restrict_uid())
895 return -EACCES;
896 }
897#endif // End of CONFIG_SEC_RESTRICT_SETUID
898
a29c33f4
EB
899 kuid = make_kuid(ns, uid);
900 if (!uid_valid(kuid))
901 return -EINVAL;
1da177e4 902
d84f4f99
DH
903 new = prepare_creds();
904 if (!new)
905 return -ENOMEM;
906 old = current_cred();
907
d84f4f99 908 retval = -EPERM;
fc832ad3 909 if (nsown_capable(CAP_SETUID)) {
a29c33f4
EB
910 new->suid = new->uid = kuid;
911 if (!uid_eq(kuid, old->uid)) {
54e99124
DG
912 retval = set_user(new);
913 if (retval < 0)
914 goto error;
d84f4f99 915 }
a29c33f4 916 } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) {
d84f4f99 917 goto error;
1da177e4 918 }
1da177e4 919
a29c33f4 920 new->fsuid = new->euid = kuid;
d84f4f99
DH
921
922 retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
923 if (retval < 0)
924 goto error;
1da177e4 925
d84f4f99 926 return commit_creds(new);
1da177e4 927
d84f4f99
DH
928error:
929 abort_creds(new);
930 return retval;
1da177e4
LT
931}
932
933
934/*
935 * This function implements a generic ability to update ruid, euid,
936 * and suid. This allows you to implement the 4.4 compatible seteuid().
937 */
ae1251ab 938SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
1da177e4 939{
a29c33f4 940 struct user_namespace *ns = current_user_ns();
d84f4f99
DH
941 const struct cred *old;
942 struct cred *new;
1da177e4 943 int retval;
a29c33f4
EB
944 kuid_t kruid, keuid, ksuid;
945
3c2a0909
S
946#if defined CONFIG_SEC_RESTRICT_SETUID
947 if(ruid == 0 || euid == 0 || suid == 0)
948 {
949 if(sec_restrict_uid())
950 return -EACCES;
951 }
952#endif // End of CONFIG_SEC_RESTRICT_SETUID
953
a29c33f4
EB
954 kruid = make_kuid(ns, ruid);
955 keuid = make_kuid(ns, euid);
956 ksuid = make_kuid(ns, suid);
957
958 if ((ruid != (uid_t) -1) && !uid_valid(kruid))
959 return -EINVAL;
960
961 if ((euid != (uid_t) -1) && !uid_valid(keuid))
962 return -EINVAL;
963
964 if ((suid != (uid_t) -1) && !uid_valid(ksuid))
965 return -EINVAL;
1da177e4 966
d84f4f99
DH
967 new = prepare_creds();
968 if (!new)
969 return -ENOMEM;
970
d84f4f99 971 old = current_cred();
1da177e4 972
d84f4f99 973 retval = -EPERM;
fc832ad3 974 if (!nsown_capable(CAP_SETUID)) {
a29c33f4
EB
975 if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
976 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
d84f4f99 977 goto error;
a29c33f4
EB
978 if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
979 !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
d84f4f99 980 goto error;
a29c33f4
EB
981 if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
982 !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
d84f4f99 983 goto error;
1da177e4 984 }
d84f4f99 985
1da177e4 986 if (ruid != (uid_t) -1) {
a29c33f4
EB
987 new->uid = kruid;
988 if (!uid_eq(kruid, old->uid)) {
54e99124
DG
989 retval = set_user(new);
990 if (retval < 0)
991 goto error;
992 }
1da177e4 993 }
d84f4f99 994 if (euid != (uid_t) -1)
a29c33f4 995 new->euid = keuid;
1da177e4 996 if (suid != (uid_t) -1)
a29c33f4 997 new->suid = ksuid;
d84f4f99 998 new->fsuid = new->euid;
1da177e4 999
d84f4f99
DH
1000 retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
1001 if (retval < 0)
1002 goto error;
1da177e4 1003
d84f4f99 1004 return commit_creds(new);
1da177e4 1005
d84f4f99
DH
1006error:
1007 abort_creds(new);
1008 return retval;
1da177e4
LT
1009}
1010
a29c33f4 1011SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp)
1da177e4 1012{
86a264ab 1013 const struct cred *cred = current_cred();
1da177e4 1014 int retval;
a29c33f4
EB
1015 uid_t ruid, euid, suid;
1016
1017 ruid = from_kuid_munged(cred->user_ns, cred->uid);
1018 euid = from_kuid_munged(cred->user_ns, cred->euid);
1019 suid = from_kuid_munged(cred->user_ns, cred->suid);
1da177e4 1020
a29c33f4
EB
1021 if (!(retval = put_user(ruid, ruidp)) &&
1022 !(retval = put_user(euid, euidp)))
1023 retval = put_user(suid, suidp);
1da177e4
LT
1024
1025 return retval;
1026}
1027
1028/*
1029 * Same as above, but for rgid, egid, sgid.
1030 */
ae1251ab 1031SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
1da177e4 1032{
a29c33f4 1033 struct user_namespace *ns = current_user_ns();
d84f4f99
DH
1034 const struct cred *old;
1035 struct cred *new;
1da177e4 1036 int retval;
a29c33f4
EB
1037 kgid_t krgid, kegid, ksgid;
1038
3c2a0909
S
1039#if defined CONFIG_SEC_RESTRICT_SETUID
1040 if(rgid == 0 || egid == 0 || sgid == 0)
1041 {
1042 if(sec_restrict_uid())
1043 return -EACCES;
1044 }
1045#endif // End of CONFIG_SEC_RESTRICT_SETUID
1046
a29c33f4
EB
1047 krgid = make_kgid(ns, rgid);
1048 kegid = make_kgid(ns, egid);
1049 ksgid = make_kgid(ns, sgid);
1050
1051 if ((rgid != (gid_t) -1) && !gid_valid(krgid))
1052 return -EINVAL;
1053 if ((egid != (gid_t) -1) && !gid_valid(kegid))
1054 return -EINVAL;
1055 if ((sgid != (gid_t) -1) && !gid_valid(ksgid))
1056 return -EINVAL;
1da177e4 1057
d84f4f99
DH
1058 new = prepare_creds();
1059 if (!new)
1060 return -ENOMEM;
1061 old = current_cred();
1062
d84f4f99 1063 retval = -EPERM;
fc832ad3 1064 if (!nsown_capable(CAP_SETGID)) {
a29c33f4
EB
1065 if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
1066 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
d84f4f99 1067 goto error;
a29c33f4
EB
1068 if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) &&
1069 !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid))
d84f4f99 1070 goto error;
a29c33f4
EB
1071 if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) &&
1072 !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid))
d84f4f99 1073 goto error;
1da177e4 1074 }
d84f4f99 1075
1da177e4 1076 if (rgid != (gid_t) -1)
a29c33f4 1077 new->gid = krgid;
d84f4f99 1078 if (egid != (gid_t) -1)
a29c33f4 1079 new->egid = kegid;
1da177e4 1080 if (sgid != (gid_t) -1)
a29c33f4 1081 new->sgid = ksgid;
d84f4f99 1082 new->fsgid = new->egid;
1da177e4 1083
d84f4f99
DH
1084 return commit_creds(new);
1085
1086error:
1087 abort_creds(new);
1088 return retval;
1da177e4
LT
1089}
1090
a29c33f4 1091SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp)
1da177e4 1092{
86a264ab 1093 const struct cred *cred = current_cred();
1da177e4 1094 int retval;
a29c33f4
EB
1095 gid_t rgid, egid, sgid;
1096
1097 rgid = from_kgid_munged(cred->user_ns, cred->gid);
1098 egid = from_kgid_munged(cred->user_ns, cred->egid);
1099 sgid = from_kgid_munged(cred->user_ns, cred->sgid);
1da177e4 1100
a29c33f4
EB
1101 if (!(retval = put_user(rgid, rgidp)) &&
1102 !(retval = put_user(egid, egidp)))
1103 retval = put_user(sgid, sgidp);
1da177e4
LT
1104
1105 return retval;
1106}
1107
1108
1109/*
1110 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
1111 * is used for "access()" and for the NFS daemon (letting nfsd stay at
1112 * whatever uid it wants to). It normally shadows "euid", except when
1113 * explicitly set by setfsuid() or for access..
1114 */
ae1251ab 1115SYSCALL_DEFINE1(setfsuid, uid_t, uid)
1da177e4 1116{
d84f4f99
DH
1117 const struct cred *old;
1118 struct cred *new;
1119 uid_t old_fsuid;
a29c33f4
EB
1120 kuid_t kuid;
1121
1122 old = current_cred();
1123 old_fsuid = from_kuid_munged(old->user_ns, old->fsuid);
1124
1125 kuid = make_kuid(old->user_ns, uid);
1126 if (!uid_valid(kuid))
1127 return old_fsuid;
1da177e4 1128
d84f4f99
DH
1129 new = prepare_creds();
1130 if (!new)
a29c33f4 1131 return old_fsuid;
1da177e4 1132
a29c33f4
EB
1133 if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) ||
1134 uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) ||
fc832ad3 1135 nsown_capable(CAP_SETUID)) {
a29c33f4
EB
1136 if (!uid_eq(kuid, old->fsuid)) {
1137 new->fsuid = kuid;
d84f4f99
DH
1138 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
1139 goto change_okay;
1da177e4 1140 }
1da177e4
LT
1141 }
1142
d84f4f99
DH
1143 abort_creds(new);
1144 return old_fsuid;
1da177e4 1145
d84f4f99
DH
1146change_okay:
1147 commit_creds(new);
1da177e4
LT
1148 return old_fsuid;
1149}
1150
1151/*
f42df9e6 1152 * Samma på svenska..
1da177e4 1153 */
ae1251ab 1154SYSCALL_DEFINE1(setfsgid, gid_t, gid)
1da177e4 1155{
d84f4f99
DH
1156 const struct cred *old;
1157 struct cred *new;
1158 gid_t old_fsgid;
a29c33f4
EB
1159 kgid_t kgid;
1160
1161 old = current_cred();
1162 old_fsgid = from_kgid_munged(old->user_ns, old->fsgid);
1163
1164 kgid = make_kgid(old->user_ns, gid);
1165 if (!gid_valid(kgid))
1166 return old_fsgid;
d84f4f99
DH
1167
1168 new = prepare_creds();
1169 if (!new)
a29c33f4 1170 return old_fsgid;
1da177e4 1171
a29c33f4
EB
1172 if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) ||
1173 gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) ||
fc832ad3 1174 nsown_capable(CAP_SETGID)) {
a29c33f4
EB
1175 if (!gid_eq(kgid, old->fsgid)) {
1176 new->fsgid = kgid;
d84f4f99 1177 goto change_okay;
1da177e4 1178 }
1da177e4 1179 }
d84f4f99 1180
d84f4f99
DH
1181 abort_creds(new);
1182 return old_fsgid;
1183
1184change_okay:
1185 commit_creds(new);
1da177e4
LT
1186 return old_fsgid;
1187}
1188
4a22f166
SR
1189/**
1190 * sys_getpid - return the thread group id of the current process
1191 *
1192 * Note, despite the name, this returns the tgid not the pid. The tgid and
1193 * the pid are identical unless CLONE_THREAD was specified on clone() in
1194 * which case the tgid is the same in all threads of the same group.
1195 *
1196 * This is SMP safe as current->tgid does not change.
1197 */
1198SYSCALL_DEFINE0(getpid)
1199{
1200 return task_tgid_vnr(current);
1201}
1202
1203/* Thread ID - the internal kernel "pid" */
1204SYSCALL_DEFINE0(gettid)
1205{
1206 return task_pid_vnr(current);
1207}
1208
1209/*
1210 * Accessing ->real_parent is not SMP-safe, it could
1211 * change from under us. However, we can use a stale
1212 * value of ->real_parent under rcu_read_lock(), see
1213 * release_task()->call_rcu(delayed_put_task_struct).
1214 */
1215SYSCALL_DEFINE0(getppid)
1216{
1217 int pid;
1218
1219 rcu_read_lock();
1220 pid = task_tgid_vnr(rcu_dereference(current->real_parent));
1221 rcu_read_unlock();
1222
1223 return pid;
1224}
1225
1226SYSCALL_DEFINE0(getuid)
1227{
1228 /* Only we change this so SMP safe */
1229 return from_kuid_munged(current_user_ns(), current_uid());
1230}
1231
1232SYSCALL_DEFINE0(geteuid)
1233{
1234 /* Only we change this so SMP safe */
1235 return from_kuid_munged(current_user_ns(), current_euid());
1236}
1237
1238SYSCALL_DEFINE0(getgid)
1239{
1240 /* Only we change this so SMP safe */
1241 return from_kgid_munged(current_user_ns(), current_gid());
1242}
1243
1244SYSCALL_DEFINE0(getegid)
1245{
1246 /* Only we change this so SMP safe */
1247 return from_kgid_munged(current_user_ns(), current_egid());
1248}
1249
f06febc9
FM
1250void do_sys_times(struct tms *tms)
1251{
0cf55e1e 1252 cputime_t tgutime, tgstime, cutime, cstime;
f06febc9 1253
2b5fe6de 1254 spin_lock_irq(&current->sighand->siglock);
e80d0a1a 1255 thread_group_cputime_adjusted(current, &tgutime, &tgstime);
f06febc9
FM
1256 cutime = current->signal->cutime;
1257 cstime = current->signal->cstime;
1258 spin_unlock_irq(&current->sighand->siglock);
0cf55e1e
HS
1259 tms->tms_utime = cputime_to_clock_t(tgutime);
1260 tms->tms_stime = cputime_to_clock_t(tgstime);
f06febc9
FM
1261 tms->tms_cutime = cputime_to_clock_t(cutime);
1262 tms->tms_cstime = cputime_to_clock_t(cstime);
1263}
1264
58fd3aa2 1265SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
1da177e4 1266{
1da177e4
LT
1267 if (tbuf) {
1268 struct tms tmp;
f06febc9
FM
1269
1270 do_sys_times(&tmp);
1da177e4
LT
1271 if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
1272 return -EFAULT;
1273 }
e3d5a27d 1274 force_successful_syscall_return();
1da177e4
LT
1275 return (long) jiffies_64_to_clock_t(get_jiffies_64());
1276}
1277
1278/*
1279 * This needs some heavy checking ...
1280 * I just haven't the stomach for it. I also don't fully
1281 * understand sessions/pgrp etc. Let somebody who does explain it.
1282 *
1283 * OK, I think I have the protection semantics right.... this is really
1284 * only important on a multi-user system anyway, to make sure one user
1285 * can't send a signal to a process owned by another. -TYT, 12/12/91
1286 *
1287 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
1288 * LBT 04.03.94
1289 */
b290ebe2 1290SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
1da177e4
LT
1291{
1292 struct task_struct *p;
ee0acf90 1293 struct task_struct *group_leader = current->group_leader;
4e021306
ON
1294 struct pid *pgrp;
1295 int err;
1da177e4
LT
1296
1297 if (!pid)
b488893a 1298 pid = task_pid_vnr(group_leader);
1da177e4
LT
1299 if (!pgid)
1300 pgid = pid;
1301 if (pgid < 0)
1302 return -EINVAL;
950eaaca 1303 rcu_read_lock();
1da177e4
LT
1304
1305 /* From this point forward we keep holding onto the tasklist lock
1306 * so that our parent does not change from under us. -DaveM
1307 */
1308 write_lock_irq(&tasklist_lock);
1309
1310 err = -ESRCH;
4e021306 1311 p = find_task_by_vpid(pid);
1da177e4
LT
1312 if (!p)
1313 goto out;
1314
1315 err = -EINVAL;
1316 if (!thread_group_leader(p))
1317 goto out;
1318
4e021306 1319 if (same_thread_group(p->real_parent, group_leader)) {
1da177e4 1320 err = -EPERM;
41487c65 1321 if (task_session(p) != task_session(group_leader))
1da177e4
LT
1322 goto out;
1323 err = -EACCES;
1324 if (p->did_exec)
1325 goto out;
1326 } else {
1327 err = -ESRCH;
ee0acf90 1328 if (p != group_leader)
1da177e4
LT
1329 goto out;
1330 }
1331
1332 err = -EPERM;
1333 if (p->signal->leader)
1334 goto out;
1335
4e021306 1336 pgrp = task_pid(p);
1da177e4 1337 if (pgid != pid) {
b488893a 1338 struct task_struct *g;
1da177e4 1339
4e021306
ON
1340 pgrp = find_vpid(pgid);
1341 g = pid_task(pgrp, PIDTYPE_PGID);
41487c65 1342 if (!g || task_session(g) != task_session(group_leader))
f020bc46 1343 goto out;
1da177e4
LT
1344 }
1345
1da177e4
LT
1346 err = security_task_setpgid(p, pgid);
1347 if (err)
1348 goto out;
1349
1b0f7ffd 1350 if (task_pgrp(p) != pgrp)
83beaf3c 1351 change_pid(p, PIDTYPE_PGID, pgrp);
1da177e4
LT
1352
1353 err = 0;
1354out:
1355 /* All paths lead to here, thus we are safe. -DaveM */
1356 write_unlock_irq(&tasklist_lock);
950eaaca 1357 rcu_read_unlock();
1da177e4
LT
1358 return err;
1359}
1360
dbf040d9 1361SYSCALL_DEFINE1(getpgid, pid_t, pid)
1da177e4 1362{
12a3de0a
ON
1363 struct task_struct *p;
1364 struct pid *grp;
1365 int retval;
1366
1367 rcu_read_lock();
756184b7 1368 if (!pid)
12a3de0a 1369 grp = task_pgrp(current);
756184b7 1370 else {
1da177e4 1371 retval = -ESRCH;
12a3de0a
ON
1372 p = find_task_by_vpid(pid);
1373 if (!p)
1374 goto out;
1375 grp = task_pgrp(p);
1376 if (!grp)
1377 goto out;
1378
1379 retval = security_task_getpgid(p);
1380 if (retval)
1381 goto out;
1da177e4 1382 }
12a3de0a
ON
1383 retval = pid_vnr(grp);
1384out:
1385 rcu_read_unlock();
1386 return retval;
1da177e4
LT
1387}
1388
1389#ifdef __ARCH_WANT_SYS_GETPGRP
1390
dbf040d9 1391SYSCALL_DEFINE0(getpgrp)
1da177e4 1392{
12a3de0a 1393 return sys_getpgid(0);
1da177e4
LT
1394}
1395
1396#endif
1397
dbf040d9 1398SYSCALL_DEFINE1(getsid, pid_t, pid)
1da177e4 1399{
1dd768c0
ON
1400 struct task_struct *p;
1401 struct pid *sid;
1402 int retval;
1403
1404 rcu_read_lock();
756184b7 1405 if (!pid)
1dd768c0 1406 sid = task_session(current);
756184b7 1407 else {
1da177e4 1408 retval = -ESRCH;
1dd768c0
ON
1409 p = find_task_by_vpid(pid);
1410 if (!p)
1411 goto out;
1412 sid = task_session(p);
1413 if (!sid)
1414 goto out;
1415
1416 retval = security_task_getsid(p);
1417 if (retval)
1418 goto out;
1da177e4 1419 }
1dd768c0
ON
1420 retval = pid_vnr(sid);
1421out:
1422 rcu_read_unlock();
1423 return retval;
1da177e4
LT
1424}
1425
b290ebe2 1426SYSCALL_DEFINE0(setsid)
1da177e4 1427{
e19f247a 1428 struct task_struct *group_leader = current->group_leader;
e4cc0a9c
ON
1429 struct pid *sid = task_pid(group_leader);
1430 pid_t session = pid_vnr(sid);
1da177e4
LT
1431 int err = -EPERM;
1432
1da177e4 1433 write_lock_irq(&tasklist_lock);
390e2ff0
EB
1434 /* Fail if I am already a session leader */
1435 if (group_leader->signal->leader)
1436 goto out;
1437
430c6231
ON
1438 /* Fail if a process group id already exists that equals the
1439 * proposed session id.
390e2ff0 1440 */
6806aac6 1441 if (pid_task(sid, PIDTYPE_PGID))
1da177e4
LT
1442 goto out;
1443
e19f247a 1444 group_leader->signal->leader = 1;
8520d7c7 1445 __set_special_pids(sid);
24ec839c 1446
9c9f4ded 1447 proc_clear_tty(group_leader);
24ec839c 1448
e4cc0a9c 1449 err = session;
1da177e4
LT
1450out:
1451 write_unlock_irq(&tasklist_lock);
5091faa4 1452 if (err > 0) {
0d0df599 1453 proc_sid_connector(group_leader);
5091faa4
MG
1454 sched_autogroup_create_attach(group_leader);
1455 }
1da177e4
LT
1456 return err;
1457}
1458
1da177e4
LT
1459DECLARE_RWSEM(uts_sem);
1460
e28cbf22
CH
1461#ifdef COMPAT_UTS_MACHINE
1462#define override_architecture(name) \
46da2766 1463 (personality(current->personality) == PER_LINUX32 && \
e28cbf22
CH
1464 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \
1465 sizeof(COMPAT_UTS_MACHINE)))
1466#else
1467#define override_architecture(name) 0
1468#endif
1469
be27425d
AK
1470/*
1471 * Work around broken programs that cannot handle "Linux 3.0".
1472 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
1473 */
2702b152 1474static int override_release(char __user *release, size_t len)
be27425d
AK
1475{
1476 int ret = 0;
be27425d
AK
1477
1478 if (current->personality & UNAME26) {
2702b152
KC
1479 const char *rest = UTS_RELEASE;
1480 char buf[65] = { 0 };
be27425d
AK
1481 int ndots = 0;
1482 unsigned v;
2702b152 1483 size_t copy;
be27425d
AK
1484
1485 while (*rest) {
1486 if (*rest == '.' && ++ndots >= 3)
1487 break;
1488 if (!isdigit(*rest) && *rest != '.')
1489 break;
1490 rest++;
1491 }
1492 v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
31fd84b9 1493 copy = clamp_t(size_t, len, 1, sizeof(buf));
2702b152
KC
1494 copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
1495 ret = copy_to_user(release, buf, copy + 1);
be27425d
AK
1496 }
1497 return ret;
1498}
1499
e48fbb69 1500SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
1da177e4
LT
1501{
1502 int errno = 0;
1503
1504 down_read(&uts_sem);
e9ff3990 1505 if (copy_to_user(name, utsname(), sizeof *name))
1da177e4
LT
1506 errno = -EFAULT;
1507 up_read(&uts_sem);
e28cbf22 1508
be27425d
AK
1509 if (!errno && override_release(name->release, sizeof(name->release)))
1510 errno = -EFAULT;
e28cbf22
CH
1511 if (!errno && override_architecture(name))
1512 errno = -EFAULT;
1da177e4
LT
1513 return errno;
1514}
1515
5cacdb4a
CH
1516#ifdef __ARCH_WANT_SYS_OLD_UNAME
1517/*
1518 * Old cruft
1519 */
1520SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
1521{
1522 int error = 0;
1523
1524 if (!name)
1525 return -EFAULT;
1526
1527 down_read(&uts_sem);
1528 if (copy_to_user(name, utsname(), sizeof(*name)))
1529 error = -EFAULT;
1530 up_read(&uts_sem);
1531
be27425d
AK
1532 if (!error && override_release(name->release, sizeof(name->release)))
1533 error = -EFAULT;
5cacdb4a
CH
1534 if (!error && override_architecture(name))
1535 error = -EFAULT;
1536 return error;
1537}
1538
1539SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
1540{
1541 int error;
1542
1543 if (!name)
1544 return -EFAULT;
1545 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
1546 return -EFAULT;
1547
1548 down_read(&uts_sem);
1549 error = __copy_to_user(&name->sysname, &utsname()->sysname,
1550 __OLD_UTS_LEN);
1551 error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
1552 error |= __copy_to_user(&name->nodename, &utsname()->nodename,
1553 __OLD_UTS_LEN);
1554 error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
1555 error |= __copy_to_user(&name->release, &utsname()->release,
1556 __OLD_UTS_LEN);
1557 error |= __put_user(0, name->release + __OLD_UTS_LEN);
1558 error |= __copy_to_user(&name->version, &utsname()->version,
1559 __OLD_UTS_LEN);
1560 error |= __put_user(0, name->version + __OLD_UTS_LEN);
1561 error |= __copy_to_user(&name->machine, &utsname()->machine,
1562 __OLD_UTS_LEN);
1563 error |= __put_user(0, name->machine + __OLD_UTS_LEN);
1564 up_read(&uts_sem);
1565
1566 if (!error && override_architecture(name))
1567 error = -EFAULT;
be27425d
AK
1568 if (!error && override_release(name->release, sizeof(name->release)))
1569 error = -EFAULT;
5cacdb4a
CH
1570 return error ? -EFAULT : 0;
1571}
1572#endif
1573
5a8a82b1 1574SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
1da177e4
LT
1575{
1576 int errno;
1577 char tmp[__NEW_UTS_LEN];
1578
bb96a6f5 1579 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
1da177e4 1580 return -EPERM;
fc832ad3 1581
1da177e4
LT
1582 if (len < 0 || len > __NEW_UTS_LEN)
1583 return -EINVAL;
1584 down_write(&uts_sem);
1585 errno = -EFAULT;
1586 if (!copy_from_user(tmp, name, len)) {
9679e4dd
AM
1587 struct new_utsname *u = utsname();
1588
1589 memcpy(u->nodename, tmp, len);
1590 memset(u->nodename + len, 0, sizeof(u->nodename) - len);
1da177e4 1591 errno = 0;
499eea6b 1592 uts_proc_notify(UTS_PROC_HOSTNAME);
1da177e4
LT
1593 }
1594 up_write(&uts_sem);
1595 return errno;
1596}
1597
1598#ifdef __ARCH_WANT_SYS_GETHOSTNAME
1599
5a8a82b1 1600SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
1da177e4
LT
1601{
1602 int i, errno;
9679e4dd 1603 struct new_utsname *u;
1da177e4
LT
1604
1605 if (len < 0)
1606 return -EINVAL;
1607 down_read(&uts_sem);
9679e4dd
AM
1608 u = utsname();
1609 i = 1 + strlen(u->nodename);
1da177e4
LT
1610 if (i > len)
1611 i = len;
1612 errno = 0;
9679e4dd 1613 if (copy_to_user(name, u->nodename, i))
1da177e4
LT
1614 errno = -EFAULT;
1615 up_read(&uts_sem);
1616 return errno;
1617}
1618
1619#endif
1620
1621/*
1622 * Only setdomainname; getdomainname can be implemented by calling
1623 * uname()
1624 */
5a8a82b1 1625SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
1da177e4
LT
1626{
1627 int errno;
1628 char tmp[__NEW_UTS_LEN];
1629
fc832ad3 1630 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
1da177e4
LT
1631 return -EPERM;
1632 if (len < 0 || len > __NEW_UTS_LEN)
1633 return -EINVAL;
1634
1635 down_write(&uts_sem);
1636 errno = -EFAULT;
1637 if (!copy_from_user(tmp, name, len)) {
9679e4dd
AM
1638 struct new_utsname *u = utsname();
1639
1640 memcpy(u->domainname, tmp, len);
1641 memset(u->domainname + len, 0, sizeof(u->domainname) - len);
1da177e4 1642 errno = 0;
499eea6b 1643 uts_proc_notify(UTS_PROC_DOMAINNAME);
1da177e4
LT
1644 }
1645 up_write(&uts_sem);
1646 return errno;
1647}
1648
e48fbb69 1649SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1da177e4 1650{
b9518345
JS
1651 struct rlimit value;
1652 int ret;
1653
1654 ret = do_prlimit(current, resource, NULL, &value);
1655 if (!ret)
1656 ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
1657
1658 return ret;
1da177e4
LT
1659}
1660
1661#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
1662
1663/*
1664 * Back compatibility for getrlimit. Needed for some apps.
1665 */
1666
e48fbb69
HC
1667SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
1668 struct rlimit __user *, rlim)
1da177e4
LT
1669{
1670 struct rlimit x;
1671 if (resource >= RLIM_NLIMITS)
1672 return -EINVAL;
1673
1674 task_lock(current->group_leader);
1675 x = current->signal->rlim[resource];
1676 task_unlock(current->group_leader);
756184b7 1677 if (x.rlim_cur > 0x7FFFFFFF)
1da177e4 1678 x.rlim_cur = 0x7FFFFFFF;
756184b7 1679 if (x.rlim_max > 0x7FFFFFFF)
1da177e4
LT
1680 x.rlim_max = 0x7FFFFFFF;
1681 return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1682}
1683
1684#endif
1685
c022a0ac
JS
1686static inline bool rlim64_is_infinity(__u64 rlim64)
1687{
1688#if BITS_PER_LONG < 64
1689 return rlim64 >= ULONG_MAX;
1690#else
1691 return rlim64 == RLIM64_INFINITY;
1692#endif
1693}
1694
1695static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64)
1696{
1697 if (rlim->rlim_cur == RLIM_INFINITY)
1698 rlim64->rlim_cur = RLIM64_INFINITY;
1699 else
1700 rlim64->rlim_cur = rlim->rlim_cur;
1701 if (rlim->rlim_max == RLIM_INFINITY)
1702 rlim64->rlim_max = RLIM64_INFINITY;
1703 else
1704 rlim64->rlim_max = rlim->rlim_max;
1705}
1706
1707static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim)
1708{
1709 if (rlim64_is_infinity(rlim64->rlim_cur))
1710 rlim->rlim_cur = RLIM_INFINITY;
1711 else
1712 rlim->rlim_cur = (unsigned long)rlim64->rlim_cur;
1713 if (rlim64_is_infinity(rlim64->rlim_max))
1714 rlim->rlim_max = RLIM_INFINITY;
1715 else
1716 rlim->rlim_max = (unsigned long)rlim64->rlim_max;
1717}
1718
1c1e618d 1719/* make sure you are allowed to change @tsk limits before calling this */
5b41535a
JS
1720int do_prlimit(struct task_struct *tsk, unsigned int resource,
1721 struct rlimit *new_rlim, struct rlimit *old_rlim)
1da177e4 1722{
5b41535a 1723 struct rlimit *rlim;
86f162f4 1724 int retval = 0;
1da177e4
LT
1725
1726 if (resource >= RLIM_NLIMITS)
1727 return -EINVAL;
5b41535a
JS
1728 if (new_rlim) {
1729 if (new_rlim->rlim_cur > new_rlim->rlim_max)
1730 return -EINVAL;
1731 if (resource == RLIMIT_NOFILE &&
1732 new_rlim->rlim_max > sysctl_nr_open)
1733 return -EPERM;
1734 }
1da177e4 1735
1c1e618d
JS
1736 /* protect tsk->signal and tsk->sighand from disappearing */
1737 read_lock(&tasklist_lock);
1738 if (!tsk->sighand) {
1739 retval = -ESRCH;
1740 goto out;
1741 }
1742
5b41535a 1743 rlim = tsk->signal->rlim + resource;
86f162f4 1744 task_lock(tsk->group_leader);
5b41535a 1745 if (new_rlim) {
fc832ad3
SH
1746 /* Keep the capable check against init_user_ns until
1747 cgroups can contain all limits */
5b41535a
JS
1748 if (new_rlim->rlim_max > rlim->rlim_max &&
1749 !capable(CAP_SYS_RESOURCE))
1750 retval = -EPERM;
1751 if (!retval)
1752 retval = security_task_setrlimit(tsk->group_leader,
1753 resource, new_rlim);
1754 if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
1755 /*
1756 * The caller is asking for an immediate RLIMIT_CPU
1757 * expiry. But we use the zero value to mean "it was
1758 * never set". So let's cheat and make it one second
1759 * instead
1760 */
1761 new_rlim->rlim_cur = 1;
1762 }
1763 }
1764 if (!retval) {
1765 if (old_rlim)
1766 *old_rlim = *rlim;
1767 if (new_rlim)
1768 *rlim = *new_rlim;
9926e4c7 1769 }
7855c35d 1770 task_unlock(tsk->group_leader);
1da177e4 1771
d3561f78
AM
1772 /*
1773 * RLIMIT_CPU handling. Note that the kernel fails to return an error
1774 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a
1775 * very long-standing error, and fixing it now risks breakage of
1776 * applications, so we live with it
1777 */
5b41535a
JS
1778 if (!retval && new_rlim && resource == RLIMIT_CPU &&
1779 new_rlim->rlim_cur != RLIM_INFINITY)
1780 update_rlimit_cpu(tsk, new_rlim->rlim_cur);
ec9e16ba 1781out:
1c1e618d 1782 read_unlock(&tasklist_lock);
2fb9d268 1783 return retval;
1da177e4
LT
1784}
1785
c022a0ac
JS
1786/* rcu lock must be held */
1787static int check_prlimit_permission(struct task_struct *task)
1788{
1789 const struct cred *cred = current_cred(), *tcred;
1790
fc832ad3
SH
1791 if (current == task)
1792 return 0;
c022a0ac 1793
fc832ad3 1794 tcred = __task_cred(task);
5af66203
EB
1795 if (uid_eq(cred->uid, tcred->euid) &&
1796 uid_eq(cred->uid, tcred->suid) &&
1797 uid_eq(cred->uid, tcred->uid) &&
1798 gid_eq(cred->gid, tcred->egid) &&
1799 gid_eq(cred->gid, tcred->sgid) &&
1800 gid_eq(cred->gid, tcred->gid))
fc832ad3 1801 return 0;
c4a4d603 1802 if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE))
fc832ad3
SH
1803 return 0;
1804
1805 return -EPERM;
c022a0ac
JS
1806}
1807
1808SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
1809 const struct rlimit64 __user *, new_rlim,
1810 struct rlimit64 __user *, old_rlim)
1811{
1812 struct rlimit64 old64, new64;
1813 struct rlimit old, new;
1814 struct task_struct *tsk;
1815 int ret;
1816
1817 if (new_rlim) {
1818 if (copy_from_user(&new64, new_rlim, sizeof(new64)))
1819 return -EFAULT;
1820 rlim64_to_rlim(&new64, &new);
1821 }
1822
1823 rcu_read_lock();
1824 tsk = pid ? find_task_by_vpid(pid) : current;
1825 if (!tsk) {
1826 rcu_read_unlock();
1827 return -ESRCH;
1828 }
1829 ret = check_prlimit_permission(tsk);
1830 if (ret) {
1831 rcu_read_unlock();
1832 return ret;
1833 }
1834 get_task_struct(tsk);
1835 rcu_read_unlock();
1836
1837 ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
1838 old_rlim ? &old : NULL);
1839
1840 if (!ret && old_rlim) {
1841 rlim_to_rlim64(&old, &old64);
1842 if (copy_to_user(old_rlim, &old64, sizeof(old64)))
1843 ret = -EFAULT;
1844 }
1845
1846 put_task_struct(tsk);
1847 return ret;
1848}
1849
7855c35d
JS
1850SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
1851{
1852 struct rlimit new_rlim;
1853
1854 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1855 return -EFAULT;
5b41535a 1856 return do_prlimit(current, resource, &new_rlim, NULL);
7855c35d
JS
1857}
1858
1da177e4
LT
1859/*
1860 * It would make sense to put struct rusage in the task_struct,
1861 * except that would make the task_struct be *really big*. After
1862 * task_struct gets moved into malloc'ed memory, it would
1863 * make sense to do this. It will make moving the rest of the information
1864 * a lot simpler! (Which we're not doing right now because we're not
1865 * measuring them yet).
1866 *
1da177e4
LT
1867 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
1868 * races with threads incrementing their own counters. But since word
1869 * reads are atomic, we either get new values or old values and we don't
1870 * care which for the sums. We always take the siglock to protect reading
1871 * the c* fields from p->signal from races with exit.c updating those
1872 * fields when reaping, so a sample either gets all the additions of a
1873 * given child after it's reaped, or none so this sample is before reaping.
2dd0ebcd 1874 *
de047c1b
RT
1875 * Locking:
1876 * We need to take the siglock for CHILDEREN, SELF and BOTH
1877 * for the cases current multithreaded, non-current single threaded
1878 * non-current multithreaded. Thread traversal is now safe with
1879 * the siglock held.
1880 * Strictly speaking, we donot need to take the siglock if we are current and
1881 * single threaded, as no one else can take our signal_struct away, no one
1882 * else can reap the children to update signal->c* counters, and no one else
1883 * can race with the signal-> fields. If we do not take any lock, the
1884 * signal-> fields could be read out of order while another thread was just
1885 * exiting. So we should place a read memory barrier when we avoid the lock.
1886 * On the writer side, write memory barrier is implied in __exit_signal
1887 * as __exit_signal releases the siglock spinlock after updating the signal->
1888 * fields. But we don't do this yet to keep things simple.
2dd0ebcd 1889 *
1da177e4
LT
1890 */
1891
f06febc9 1892static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
679c9cd4 1893{
679c9cd4
SK
1894 r->ru_nvcsw += t->nvcsw;
1895 r->ru_nivcsw += t->nivcsw;
1896 r->ru_minflt += t->min_flt;
1897 r->ru_majflt += t->maj_flt;
1898 r->ru_inblock += task_io_get_inblock(t);
1899 r->ru_oublock += task_io_get_oublock(t);
1900}
1901
1da177e4
LT
1902static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1903{
1904 struct task_struct *t;
1905 unsigned long flags;
0cf55e1e 1906 cputime_t tgutime, tgstime, utime, stime;
1f10206c 1907 unsigned long maxrss = 0;
1da177e4
LT
1908
1909 memset((char *) r, 0, sizeof *r);
64861634 1910 utime = stime = 0;
1da177e4 1911
679c9cd4 1912 if (who == RUSAGE_THREAD) {
e80d0a1a 1913 task_cputime_adjusted(current, &utime, &stime);
f06febc9 1914 accumulate_thread_rusage(p, r);
1f10206c 1915 maxrss = p->signal->maxrss;
679c9cd4
SK
1916 goto out;
1917 }
1918
d6cf723a 1919 if (!lock_task_sighand(p, &flags))
de047c1b 1920 return;
0f59cc4a 1921
1da177e4 1922 switch (who) {
0f59cc4a 1923 case RUSAGE_BOTH:
1da177e4 1924 case RUSAGE_CHILDREN:
1da177e4
LT
1925 utime = p->signal->cutime;
1926 stime = p->signal->cstime;
1927 r->ru_nvcsw = p->signal->cnvcsw;
1928 r->ru_nivcsw = p->signal->cnivcsw;
1929 r->ru_minflt = p->signal->cmin_flt;
1930 r->ru_majflt = p->signal->cmaj_flt;
6eaeeaba
ED
1931 r->ru_inblock = p->signal->cinblock;
1932 r->ru_oublock = p->signal->coublock;
1f10206c 1933 maxrss = p->signal->cmaxrss;
0f59cc4a
ON
1934
1935 if (who == RUSAGE_CHILDREN)
1936 break;
1937
1da177e4 1938 case RUSAGE_SELF:
e80d0a1a 1939 thread_group_cputime_adjusted(p, &tgutime, &tgstime);
64861634
MS
1940 utime += tgutime;
1941 stime += tgstime;
1da177e4
LT
1942 r->ru_nvcsw += p->signal->nvcsw;
1943 r->ru_nivcsw += p->signal->nivcsw;
1944 r->ru_minflt += p->signal->min_flt;
1945 r->ru_majflt += p->signal->maj_flt;
6eaeeaba
ED
1946 r->ru_inblock += p->signal->inblock;
1947 r->ru_oublock += p->signal->oublock;
1f10206c
JP
1948 if (maxrss < p->signal->maxrss)
1949 maxrss = p->signal->maxrss;
1da177e4
LT
1950 t = p;
1951 do {
f06febc9 1952 accumulate_thread_rusage(t, r);
1da177e4
LT
1953 t = next_thread(t);
1954 } while (t != p);
1da177e4 1955 break;
0f59cc4a 1956
1da177e4
LT
1957 default:
1958 BUG();
1959 }
de047c1b 1960 unlock_task_sighand(p, &flags);
de047c1b 1961
679c9cd4 1962out:
0f59cc4a
ON
1963 cputime_to_timeval(utime, &r->ru_utime);
1964 cputime_to_timeval(stime, &r->ru_stime);
1f10206c
JP
1965
1966 if (who != RUSAGE_CHILDREN) {
1967 struct mm_struct *mm = get_task_mm(p);
1968 if (mm) {
1969 setmax_mm_hiwater_rss(&maxrss, mm);
1970 mmput(mm);
1971 }
1972 }
1973 r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
1da177e4
LT
1974}
1975
1976int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
1977{
1978 struct rusage r;
1da177e4 1979 k_getrusage(p, who, &r);
1da177e4
LT
1980 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1981}
1982
e48fbb69 1983SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
1da177e4 1984{
679c9cd4
SK
1985 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
1986 who != RUSAGE_THREAD)
1da177e4
LT
1987 return -EINVAL;
1988 return getrusage(current, who, ru);
1989}
1990
8d2d5c4a
AV
1991#ifdef CONFIG_COMPAT
1992COMPAT_SYSCALL_DEFINE2(getrusage, int, who, struct compat_rusage __user *, ru)
1993{
1994 struct rusage r;
1995
1996 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
1997 who != RUSAGE_THREAD)
1998 return -EINVAL;
1999
2000 k_getrusage(current, who, &r);
2001 return put_compat_rusage(&r, ru);
2002}
2003#endif
2004
e48fbb69 2005SYSCALL_DEFINE1(umask, int, mask)
1da177e4
LT
2006{
2007 mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
2008 return mask;
2009}
3b7391de 2010
b32dfe37
CG
2011static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
2012{
2903ff01 2013 struct fd exe;
496ad9aa 2014 struct inode *inode;
2903ff01 2015 int err;
b32dfe37 2016
2903ff01
AV
2017 exe = fdget(fd);
2018 if (!exe.file)
b32dfe37
CG
2019 return -EBADF;
2020
496ad9aa 2021 inode = file_inode(exe.file);
b32dfe37
CG
2022
2023 /*
2024 * Because the original mm->exe_file points to executable file, make
2025 * sure that this one is executable as well, to avoid breaking an
2026 * overall picture.
2027 */
2028 err = -EACCES;
496ad9aa 2029 if (!S_ISREG(inode->i_mode) ||
2903ff01 2030 exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC)
b32dfe37
CG
2031 goto exit;
2032
496ad9aa 2033 err = inode_permission(inode, MAY_EXEC);
b32dfe37
CG
2034 if (err)
2035 goto exit;
2036
bafb282d
KK
2037 down_write(&mm->mmap_sem);
2038
2039 /*
4229fb1d 2040 * Forbid mm->exe_file change if old file still mapped.
bafb282d
KK
2041 */
2042 err = -EBUSY;
4229fb1d
KK
2043 if (mm->exe_file) {
2044 struct vm_area_struct *vma;
2045
2046 for (vma = mm->mmap; vma; vma = vma->vm_next)
2047 if (vma->vm_file &&
2048 path_equal(&vma->vm_file->f_path,
2049 &mm->exe_file->f_path))
2050 goto exit_unlock;
bafb282d
KK
2051 }
2052
b32dfe37
CG
2053 /*
2054 * The symlink can be changed only once, just to disallow arbitrary
2055 * transitions malicious software might bring in. This means one
2056 * could make a snapshot over all processes running and monitor
2057 * /proc/pid/exe changes to notice unusual activity if needed.
2058 */
bafb282d
KK
2059 err = -EPERM;
2060 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
2061 goto exit_unlock;
2062
4229fb1d 2063 err = 0;
2903ff01 2064 set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */
bafb282d 2065exit_unlock:
b32dfe37
CG
2066 up_write(&mm->mmap_sem);
2067
2068exit:
2903ff01 2069 fdput(exe);
b32dfe37
CG
2070 return err;
2071}
2072
028ee4be
CG
2073static int prctl_set_mm(int opt, unsigned long addr,
2074 unsigned long arg4, unsigned long arg5)
2075{
2076 unsigned long rlim = rlimit(RLIMIT_DATA);
028ee4be 2077 struct mm_struct *mm = current->mm;
fe8c7f5c
CG
2078 struct vm_area_struct *vma;
2079 int error;
028ee4be 2080
fe8c7f5c 2081 if (arg5 || (arg4 && opt != PR_SET_MM_AUXV))
028ee4be
CG
2082 return -EINVAL;
2083
79f0713d 2084 if (!capable(CAP_SYS_RESOURCE))
028ee4be
CG
2085 return -EPERM;
2086
b32dfe37
CG
2087 if (opt == PR_SET_MM_EXE_FILE)
2088 return prctl_set_mm_exe_file(mm, (unsigned int)addr);
2089
1ad75b9e 2090 if (addr >= TASK_SIZE || addr < mmap_min_addr)
028ee4be
CG
2091 return -EINVAL;
2092
fe8c7f5c
CG
2093 error = -EINVAL;
2094
028ee4be
CG
2095 down_read(&mm->mmap_sem);
2096 vma = find_vma(mm, addr);
2097
028ee4be
CG
2098 switch (opt) {
2099 case PR_SET_MM_START_CODE:
fe8c7f5c
CG
2100 mm->start_code = addr;
2101 break;
028ee4be 2102 case PR_SET_MM_END_CODE:
fe8c7f5c 2103 mm->end_code = addr;
028ee4be 2104 break;
028ee4be 2105 case PR_SET_MM_START_DATA:
fe8c7f5c 2106 mm->start_data = addr;
028ee4be 2107 break;
fe8c7f5c
CG
2108 case PR_SET_MM_END_DATA:
2109 mm->end_data = addr;
028ee4be
CG
2110 break;
2111
2112 case PR_SET_MM_START_BRK:
2113 if (addr <= mm->end_data)
2114 goto out;
2115
2116 if (rlim < RLIM_INFINITY &&
2117 (mm->brk - addr) +
2118 (mm->end_data - mm->start_data) > rlim)
2119 goto out;
2120
2121 mm->start_brk = addr;
2122 break;
2123
2124 case PR_SET_MM_BRK:
2125 if (addr <= mm->end_data)
2126 goto out;
2127
2128 if (rlim < RLIM_INFINITY &&
2129 (addr - mm->start_brk) +
2130 (mm->end_data - mm->start_data) > rlim)
2131 goto out;
2132
2133 mm->brk = addr;
2134 break;
2135
fe8c7f5c
CG
2136 /*
2137 * If command line arguments and environment
2138 * are placed somewhere else on stack, we can
2139 * set them up here, ARG_START/END to setup
2140 * command line argumets and ENV_START/END
2141 * for environment.
2142 */
2143 case PR_SET_MM_START_STACK:
2144 case PR_SET_MM_ARG_START:
2145 case PR_SET_MM_ARG_END:
2146 case PR_SET_MM_ENV_START:
2147 case PR_SET_MM_ENV_END:
2148 if (!vma) {
2149 error = -EFAULT;
2150 goto out;
2151 }
fe8c7f5c
CG
2152 if (opt == PR_SET_MM_START_STACK)
2153 mm->start_stack = addr;
2154 else if (opt == PR_SET_MM_ARG_START)
2155 mm->arg_start = addr;
2156 else if (opt == PR_SET_MM_ARG_END)
2157 mm->arg_end = addr;
2158 else if (opt == PR_SET_MM_ENV_START)
2159 mm->env_start = addr;
2160 else if (opt == PR_SET_MM_ENV_END)
2161 mm->env_end = addr;
2162 break;
2163
2164 /*
2165 * This doesn't move auxiliary vector itself
2166 * since it's pinned to mm_struct, but allow
2167 * to fill vector with new values. It's up
2168 * to a caller to provide sane values here
2169 * otherwise user space tools which use this
2170 * vector might be unhappy.
2171 */
2172 case PR_SET_MM_AUXV: {
2173 unsigned long user_auxv[AT_VECTOR_SIZE];
2174
2175 if (arg4 > sizeof(user_auxv))
2176 goto out;
2177 up_read(&mm->mmap_sem);
2178
2179 if (copy_from_user(user_auxv, (const void __user *)addr, arg4))
2180 return -EFAULT;
2181
2182 /* Make sure the last entry is always AT_NULL */
2183 user_auxv[AT_VECTOR_SIZE - 2] = 0;
2184 user_auxv[AT_VECTOR_SIZE - 1] = 0;
2185
2186 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
2187
2188 task_lock(current);
2189 memcpy(mm->saved_auxv, user_auxv, arg4);
2190 task_unlock(current);
2191
2192 return 0;
2193 }
028ee4be 2194 default:
028ee4be
CG
2195 goto out;
2196 }
2197
2198 error = 0;
028ee4be
CG
2199out:
2200 up_read(&mm->mmap_sem);
028ee4be
CG
2201 return error;
2202}
300f786b 2203
52b36941 2204#ifdef CONFIG_CHECKPOINT_RESTORE
300f786b
CG
2205static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
2206{
2207 return put_user(me->clear_child_tid, tid_addr);
2208}
52b36941 2209#else
300f786b
CG
2210static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
2211{
2212 return -EINVAL;
2213}
028ee4be
CG
2214#endif
2215
3c2a0909
S
2216#ifdef CONFIG_MMU
2217static int prctl_update_vma_anon_name(struct vm_area_struct *vma,
2218 struct vm_area_struct **prev,
2219 unsigned long start, unsigned long end,
2220 const char __user *name_addr)
2221{
2222 struct mm_struct * mm = vma->vm_mm;
2223 int error = 0;
2224 pgoff_t pgoff;
2225
2226 if (name_addr == vma_get_anon_name(vma)) {
2227 *prev = vma;
2228 goto out;
2229 }
2230
2231 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
2232 *prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma,
2233 vma->vm_file, pgoff, vma_policy(vma),
2234 name_addr);
2235 if (*prev) {
2236 vma = *prev;
2237 goto success;
2238 }
2239
2240 *prev = vma;
2241
2242 if (start != vma->vm_start) {
2243 error = split_vma(mm, vma, start, 1);
2244 if (error)
2245 goto out;
2246 }
2247
2248 if (end != vma->vm_end) {
2249 error = split_vma(mm, vma, end, 0);
2250 if (error)
2251 goto out;
2252 }
2253
2254success:
2255 if (!vma->vm_file)
2256 vma->shared.anon_name = name_addr;
2257
2258out:
2259 if (error == -ENOMEM)
2260 error = -EAGAIN;
2261 return error;
2262}
2263
2264static int prctl_set_vma_anon_name(unsigned long start, unsigned long end,
2265 unsigned long arg)
2266{
2267 unsigned long tmp;
2268 struct vm_area_struct * vma, *prev;
2269 int unmapped_error = 0;
2270 int error = -EINVAL;
2271
2272 /*
2273 * If the interval [start,end) covers some unmapped address
2274 * ranges, just ignore them, but return -ENOMEM at the end.
2275 * - this matches the handling in madvise.
2276 */
2277 vma = find_vma_prev(current->mm, start, &prev);
2278 if (vma && start > vma->vm_start)
2279 prev = vma;
2280
2281 for (;;) {
2282 /* Still start < end. */
2283 error = -ENOMEM;
2284 if (!vma)
2285 return error;
2286
2287 /* Here start < (end|vma->vm_end). */
2288 if (start < vma->vm_start) {
2289 unmapped_error = -ENOMEM;
2290 start = vma->vm_start;
2291 if (start >= end)
2292 return error;
2293 }
2294
2295 /* Here vma->vm_start <= start < (end|vma->vm_end) */
2296 tmp = vma->vm_end;
2297 if (end < tmp)
2298 tmp = end;
2299
2300 /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
2301 error = prctl_update_vma_anon_name(vma, &prev, start, tmp,
2302 (const char __user *)arg);
2303 if (error)
2304 return error;
2305 start = tmp;
2306 if (prev && start < prev->vm_end)
2307 start = prev->vm_end;
2308 error = unmapped_error;
2309 if (start >= end)
2310 return error;
2311 if (prev)
2312 vma = prev->vm_next;
2313 else /* madvise_remove dropped mmap_sem */
2314 vma = find_vma(current->mm, start);
2315 }
2316}
2317
2318static int prctl_set_vma(unsigned long opt, unsigned long start,
2319 unsigned long len_in, unsigned long arg)
2320{
2321 struct mm_struct *mm = current->mm;
2322 int error;
2323 unsigned long len;
2324 unsigned long end;
2325
2326 if (start & ~PAGE_MASK)
2327 return -EINVAL;
2328 len = (len_in + ~PAGE_MASK) & PAGE_MASK;
2329
2330 /* Check to see whether len was rounded up from small -ve to zero */
2331 if (len_in && !len)
2332 return -EINVAL;
2333
2334 end = start + len;
2335 if (end < start)
2336 return -EINVAL;
2337
2338 if (end == start)
2339 return 0;
2340
2341 down_write(&mm->mmap_sem);
2342
2343 switch (opt) {
2344 case PR_SET_VMA_ANON_NAME:
2345 error = prctl_set_vma_anon_name(start, end, arg);
2346 break;
2347 default:
2348 error = -EINVAL;
2349 }
2350
2351 up_write(&mm->mmap_sem);
2352
2353 return error;
2354}
2355#else /* CONFIG_MMU */
2356static int prctl_set_vma(unsigned long opt, unsigned long start,
2357 unsigned long len_in, unsigned long arg)
2358{
2359 return -EINVAL;
2360}
2361#endif
2362
c4ea37c2
HC
2363SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2364 unsigned long, arg4, unsigned long, arg5)
1da177e4 2365{
b6dff3ec 2366 struct task_struct *me = current;
3c2a0909 2367 struct task_struct *tsk;
b6dff3ec
DH
2368 unsigned char comm[sizeof(me->comm)];
2369 long error;
1da177e4 2370
d84f4f99
DH
2371 error = security_task_prctl(option, arg2, arg3, arg4, arg5);
2372 if (error != -ENOSYS)
1da177e4
LT
2373 return error;
2374
d84f4f99 2375 error = 0;
1da177e4 2376 switch (option) {
f3cbd435
AM
2377 case PR_SET_PDEATHSIG:
2378 if (!valid_signal(arg2)) {
2379 error = -EINVAL;
1da177e4 2380 break;
f3cbd435
AM
2381 }
2382 me->pdeath_signal = arg2;
2383 break;
2384 case PR_GET_PDEATHSIG:
2385 error = put_user(me->pdeath_signal, (int __user *)arg2);
2386 break;
2387 case PR_GET_DUMPABLE:
2388 error = get_dumpable(me->mm);
2389 break;
2390 case PR_SET_DUMPABLE:
2391 if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) {
2392 error = -EINVAL;
1da177e4 2393 break;
f3cbd435
AM
2394 }
2395 set_dumpable(me->mm, arg2);
2396 break;
1da177e4 2397
f3cbd435
AM
2398 case PR_SET_UNALIGN:
2399 error = SET_UNALIGN_CTL(me, arg2);
2400 break;
2401 case PR_GET_UNALIGN:
2402 error = GET_UNALIGN_CTL(me, arg2);
2403 break;
2404 case PR_SET_FPEMU:
2405 error = SET_FPEMU_CTL(me, arg2);
2406 break;
2407 case PR_GET_FPEMU:
2408 error = GET_FPEMU_CTL(me, arg2);
2409 break;
2410 case PR_SET_FPEXC:
2411 error = SET_FPEXC_CTL(me, arg2);
2412 break;
2413 case PR_GET_FPEXC:
2414 error = GET_FPEXC_CTL(me, arg2);
2415 break;
2416 case PR_GET_TIMING:
2417 error = PR_TIMING_STATISTICAL;
2418 break;
2419 case PR_SET_TIMING:
2420 if (arg2 != PR_TIMING_STATISTICAL)
2421 error = -EINVAL;
2422 break;
2423 case PR_SET_NAME:
2424 comm[sizeof(me->comm) - 1] = 0;
2425 if (strncpy_from_user(comm, (char __user *)arg2,
2426 sizeof(me->comm) - 1) < 0)
2427 return -EFAULT;
2428 set_task_comm(me, comm);
2429 proc_comm_connector(me);
2430 break;
2431 case PR_GET_NAME:
2432 get_task_comm(comm, me);
2433 if (copy_to_user((char __user *)arg2, comm, sizeof(comm)))
2434 return -EFAULT;
2435 break;
2436 case PR_GET_ENDIAN:
2437 error = GET_ENDIAN(me, arg2);
2438 break;
2439 case PR_SET_ENDIAN:
2440 error = SET_ENDIAN(me, arg2);
2441 break;
2442 case PR_GET_SECCOMP:
2443 error = prctl_get_seccomp();
2444 break;
2445 case PR_SET_SECCOMP:
2446 error = prctl_set_seccomp(arg2, (char __user *)arg3);
2447 break;
2448 case PR_GET_TSC:
2449 error = GET_TSC_CTL(arg2);
2450 break;
2451 case PR_SET_TSC:
2452 error = SET_TSC_CTL(arg2);
2453 break;
2454 case PR_TASK_PERF_EVENTS_DISABLE:
2455 error = perf_event_task_disable();
2456 break;
2457 case PR_TASK_PERF_EVENTS_ENABLE:
2458 error = perf_event_task_enable();
2459 break;
2460 case PR_GET_TIMERSLACK:
2461 error = current->timer_slack_ns;
2462 break;
2463 case PR_SET_TIMERSLACK:
2464 if (arg2 <= 0)
2465 current->timer_slack_ns =
6976675d 2466 current->default_timer_slack_ns;
f3cbd435
AM
2467 else
2468 current->timer_slack_ns = arg2;
2469 break;
2470 case PR_MCE_KILL:
2471 if (arg4 | arg5)
2472 return -EINVAL;
2473 switch (arg2) {
2474 case PR_MCE_KILL_CLEAR:
2475 if (arg3 != 0)
4db96cf0 2476 return -EINVAL;
f3cbd435 2477 current->flags &= ~PF_MCE_PROCESS;
4db96cf0 2478 break;
f3cbd435
AM
2479 case PR_MCE_KILL_SET:
2480 current->flags |= PF_MCE_PROCESS;
2481 if (arg3 == PR_MCE_KILL_EARLY)
2482 current->flags |= PF_MCE_EARLY;
2483 else if (arg3 == PR_MCE_KILL_LATE)
2484 current->flags &= ~PF_MCE_EARLY;
2485 else if (arg3 == PR_MCE_KILL_DEFAULT)
2486 current->flags &=
2487 ~(PF_MCE_EARLY|PF_MCE_PROCESS);
1087e9b4 2488 else
259e5e6c 2489 return -EINVAL;
259e5e6c 2490 break;
3c2a0909
S
2491 case PR_SET_TIMERSLACK_PID:
2492 if (current->pid != (pid_t)arg3 &&
2493 !capable(CAP_SYS_NICE))
2494 return -EPERM;
2495 rcu_read_lock();
2496 tsk = find_task_by_pid_ns((pid_t)arg3, &init_pid_ns);
2497 if (tsk == NULL) {
2498 rcu_read_unlock();
2499 return -EINVAL;
2500 }
2501 get_task_struct(tsk);
2502 rcu_read_unlock();
2503 if (arg2 <= 0)
2504 tsk->timer_slack_ns =
2505 tsk->default_timer_slack_ns;
2506 else
2507 tsk->timer_slack_ns = arg2;
2508 put_task_struct(tsk);
2509 error = 0;
2510 break;
1da177e4 2511 default:
f3cbd435
AM
2512 return -EINVAL;
2513 }
2514 break;
2515 case PR_MCE_KILL_GET:
2516 if (arg2 | arg3 | arg4 | arg5)
2517 return -EINVAL;
2518 if (current->flags & PF_MCE_PROCESS)
2519 error = (current->flags & PF_MCE_EARLY) ?
2520 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
2521 else
2522 error = PR_MCE_KILL_DEFAULT;
2523 break;
2524 case PR_SET_MM:
2525 error = prctl_set_mm(arg2, arg3, arg4, arg5);
2526 break;
2527 case PR_GET_TID_ADDRESS:
2528 error = prctl_get_tid_address(me, (int __user **)arg2);
2529 break;
2530 case PR_SET_CHILD_SUBREAPER:
2531 me->signal->is_child_subreaper = !!arg2;
2532 break;
2533 case PR_GET_CHILD_SUBREAPER:
2534 error = put_user(me->signal->is_child_subreaper,
2535 (int __user *)arg2);
2536 break;
2537 case PR_SET_NO_NEW_PRIVS:
2538 if (arg2 != 1 || arg3 || arg4 || arg5)
2539 return -EINVAL;
2540
3c2a0909 2541 task_set_no_new_privs(current);
f3cbd435
AM
2542 break;
2543 case PR_GET_NO_NEW_PRIVS:
2544 if (arg2 || arg3 || arg4 || arg5)
2545 return -EINVAL;
3c2a0909
S
2546 return task_no_new_privs(current) ? 1 : 0;
2547 case PR_SET_VMA:
2548 error = prctl_set_vma(arg2, arg3, arg4, arg5);
2549 break;
f3cbd435
AM
2550 default:
2551 error = -EINVAL;
2552 break;
1da177e4
LT
2553 }
2554 return error;
2555}
3cfc348b 2556
836f92ad
HC
2557SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
2558 struct getcpu_cache __user *, unused)
3cfc348b
AK
2559{
2560 int err = 0;
2561 int cpu = raw_smp_processor_id();
2562 if (cpup)
2563 err |= put_user(cpu, cpup);
2564 if (nodep)
2565 err |= put_user(cpu_to_node(cpu), nodep);
3cfc348b
AK
2566 return err ? -EFAULT : 0;
2567}
10a0a8d4
JF
2568
2569char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
2570
2ca067ef 2571static int __orderly_poweroff(bool force)
10a0a8d4 2572{
b57b44ae 2573 char **argv;
10a0a8d4
JF
2574 static char *envp[] = {
2575 "HOME=/",
2576 "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
2577 NULL
2578 };
b57b44ae 2579 int ret;
10a0a8d4 2580
2ca067ef
ON
2581 argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL);
2582 if (argv) {
2583 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
2584 argv_free(argv);
2585 } else {
10a0a8d4 2586 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
2ca067ef
ON
2587 __func__, poweroff_cmd);
2588 ret = -ENOMEM;
10a0a8d4
JF
2589 }
2590
2ca067ef
ON
2591 if (ret && force) {
2592 printk(KERN_WARNING "Failed to start orderly shutdown: "
2593 "forcing the issue\n");
2594 /*
2595 * I guess this should try to kick off some daemon to sync and
2596 * poweroff asap. Or not even bother syncing if we're doing an
2597 * emergency shutdown?
2598 */
2599 emergency_sync();
2600 kernel_power_off();
2601 }
10a0a8d4 2602
b57b44ae
AM
2603 return ret;
2604}
2605
2ca067ef
ON
2606static bool poweroff_force;
2607
2608static void poweroff_work_func(struct work_struct *work)
2609{
2610 __orderly_poweroff(poweroff_force);
2611}
2612
2613static DECLARE_WORK(poweroff_work, poweroff_work_func);
2614
b57b44ae
AM
2615/**
2616 * orderly_poweroff - Trigger an orderly system poweroff
2617 * @force: force poweroff if command execution fails
2618 *
2619 * This may be called from any context to trigger a system shutdown.
2620 * If the orderly shutdown fails, it will force an immediate shutdown.
2621 */
2622int orderly_poweroff(bool force)
2623{
2ca067ef
ON
2624 if (force) /* do not override the pending "true" */
2625 poweroff_force = true;
2626 schedule_work(&poweroff_work);
2627 return 0;
10a0a8d4
JF
2628}
2629EXPORT_SYMBOL_GPL(orderly_poweroff);
4a22f166
SR
2630
2631/**
2632 * do_sysinfo - fill in sysinfo struct
2633 * @info: pointer to buffer to fill
2634 */
2635static int do_sysinfo(struct sysinfo *info)
2636{
2637 unsigned long mem_total, sav_total;
2638 unsigned int mem_unit, bitcount;
2639 struct timespec tp;
2640
2641 memset(info, 0, sizeof(struct sysinfo));
2642
2643 ktime_get_ts(&tp);
2644 monotonic_to_bootbased(&tp);
2645 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
2646
2647 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
2648
2649 info->procs = nr_threads;
2650
2651 si_meminfo(info);
2652 si_swapinfo(info);
2653
2654 /*
2655 * If the sum of all the available memory (i.e. ram + swap)
2656 * is less than can be stored in a 32 bit unsigned long then
2657 * we can be binary compatible with 2.2.x kernels. If not,
2658 * well, in that case 2.2.x was broken anyways...
2659 *
2660 * -Erik Andersen <andersee@debian.org>
2661 */
2662
2663 mem_total = info->totalram + info->totalswap;
2664 if (mem_total < info->totalram || mem_total < info->totalswap)
2665 goto out;
2666 bitcount = 0;
2667 mem_unit = info->mem_unit;
2668 while (mem_unit > 1) {
2669 bitcount++;
2670 mem_unit >>= 1;
2671 sav_total = mem_total;
2672 mem_total <<= 1;
2673 if (mem_total < sav_total)
2674 goto out;
2675 }
2676
2677 /*
2678 * If mem_total did not overflow, multiply all memory values by
2679 * info->mem_unit and set it to 1. This leaves things compatible
2680 * with 2.2.x, and also retains compatibility with earlier 2.4.x
2681 * kernels...
2682 */
2683
2684 info->mem_unit = 1;
2685 info->totalram <<= bitcount;
2686 info->freeram <<= bitcount;
2687 info->sharedram <<= bitcount;
2688 info->bufferram <<= bitcount;
2689 info->totalswap <<= bitcount;
2690 info->freeswap <<= bitcount;
2691 info->totalhigh <<= bitcount;
2692 info->freehigh <<= bitcount;
2693
2694out:
2695 return 0;
2696}
2697
2698SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
2699{
2700 struct sysinfo val;
2701
2702 do_sysinfo(&val);
2703
2704 if (copy_to_user(info, &val, sizeof(struct sysinfo)))
2705 return -EFAULT;
2706
2707 return 0;
2708}
2709
2710#ifdef CONFIG_COMPAT
2711struct compat_sysinfo {
2712 s32 uptime;
2713 u32 loads[3];
2714 u32 totalram;
2715 u32 freeram;
2716 u32 sharedram;
2717 u32 bufferram;
2718 u32 totalswap;
2719 u32 freeswap;
2720 u16 procs;
2721 u16 pad;
2722 u32 totalhigh;
2723 u32 freehigh;
2724 u32 mem_unit;
2725 char _f[20-2*sizeof(u32)-sizeof(int)];
2726};
2727
2728COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
2729{
2730 struct sysinfo s;
2731
2732 do_sysinfo(&s);
2733
2734 /* Check to see if any memory value is too large for 32-bit and scale
2735 * down if needed
2736 */
2737 if ((s.totalram >> 32) || (s.totalswap >> 32)) {
2738 int bitcount = 0;
2739
2740 while (s.mem_unit < PAGE_SIZE) {
2741 s.mem_unit <<= 1;
2742 bitcount++;
2743 }
2744
2745 s.totalram >>= bitcount;
2746 s.freeram >>= bitcount;
2747 s.sharedram >>= bitcount;
2748 s.bufferram >>= bitcount;
2749 s.totalswap >>= bitcount;
2750 s.freeswap >>= bitcount;
2751 s.totalhigh >>= bitcount;
2752 s.freehigh >>= bitcount;
2753 }
2754
2755 if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) ||
2756 __put_user(s.uptime, &info->uptime) ||
2757 __put_user(s.loads[0], &info->loads[0]) ||
2758 __put_user(s.loads[1], &info->loads[1]) ||
2759 __put_user(s.loads[2], &info->loads[2]) ||
2760 __put_user(s.totalram, &info->totalram) ||
2761 __put_user(s.freeram, &info->freeram) ||
2762 __put_user(s.sharedram, &info->sharedram) ||
2763 __put_user(s.bufferram, &info->bufferram) ||
2764 __put_user(s.totalswap, &info->totalswap) ||
2765 __put_user(s.freeswap, &info->freeswap) ||
2766 __put_user(s.procs, &info->procs) ||
2767 __put_user(s.totalhigh, &info->totalhigh) ||
2768 __put_user(s.freehigh, &info->freehigh) ||
2769 __put_user(s.mem_unit, &info->mem_unit))
2770 return -EFAULT;
2771
2772 return 0;
2773}
2774#endif /* CONFIG_COMPAT */