FROMLIST: arm64: vdso32: Use full path to Clang instead of relying on PATH
[GitHub/exynos8895/android_kernel_samsung_universal8895.git] / kernel / user_namespace.c
1 /*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License as
4 * published by the Free Software Foundation, version 2 of the
5 * License.
6 */
7
8 #include <linux/export.h>
9 #include <linux/nsproxy.h>
10 #include <linux/slab.h>
11 #include <linux/user_namespace.h>
12 #include <linux/proc_ns.h>
13 #include <linux/highuid.h>
14 #include <linux/cred.h>
15 #include <linux/securebits.h>
16 #include <linux/keyctl.h>
17 #include <linux/key-type.h>
18 #include <keys/user-type.h>
19 #include <linux/seq_file.h>
20 #include <linux/fs.h>
21 #include <linux/uaccess.h>
22 #include <linux/ctype.h>
23 #include <linux/projid.h>
24 #include <linux/fs_struct.h>
25
26 static struct kmem_cache *user_ns_cachep __read_mostly;
27 static DEFINE_MUTEX(userns_state_mutex);
28
29 static bool new_idmap_permitted(const struct file *file,
30 struct user_namespace *ns, int cap_setid,
31 struct uid_gid_map *map);
32
33 static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
34 {
35 /* Start with the same capabilities as init but useless for doing
36 * anything as the capabilities are bound to the new user namespace.
37 */
38 cred->securebits = SECUREBITS_DEFAULT;
39 cred->cap_inheritable = CAP_EMPTY_SET;
40 cred->cap_permitted = CAP_FULL_SET;
41 cred->cap_effective = CAP_FULL_SET;
42 cred->cap_ambient = CAP_EMPTY_SET;
43 cred->cap_bset = CAP_FULL_SET;
44 #ifdef CONFIG_KEYS
45 key_put(cred->request_key_auth);
46 cred->request_key_auth = NULL;
47 #endif
48 /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
49 cred->user_ns = user_ns;
50 }
51
52 /*
53 * Create a new user namespace, deriving the creator from the user in the
54 * passed credentials, and replacing that user with the new root user for the
55 * new namespace.
56 *
57 * This is called by copy_creds(), which will finish setting the target task's
58 * credentials.
59 */
60 int create_user_ns(struct cred *new)
61 {
62 struct user_namespace *ns, *parent_ns = new->user_ns;
63 kuid_t owner = new->euid;
64 kgid_t group = new->egid;
65 int ret;
66
67 if (parent_ns->level > 32)
68 return -EUSERS;
69
70 /*
71 * Verify that we can not violate the policy of which files
72 * may be accessed that is specified by the root directory,
73 * by verifing that the root directory is at the root of the
74 * mount namespace which allows all files to be accessed.
75 */
76 if (current_chrooted())
77 return -EPERM;
78
79 /* The creator needs a mapping in the parent user namespace
80 * or else we won't be able to reasonably tell userspace who
81 * created a user_namespace.
82 */
83 if (!kuid_has_mapping(parent_ns, owner) ||
84 !kgid_has_mapping(parent_ns, group))
85 return -EPERM;
86
87 ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
88 if (!ns)
89 return -ENOMEM;
90
91 ret = ns_alloc_inum(&ns->ns);
92 if (ret) {
93 kmem_cache_free(user_ns_cachep, ns);
94 return ret;
95 }
96 ns->ns.ops = &userns_operations;
97
98 atomic_set(&ns->count, 1);
99 /* Leave the new->user_ns reference with the new user namespace. */
100 ns->parent = parent_ns;
101 ns->level = parent_ns->level + 1;
102 ns->owner = owner;
103 ns->group = group;
104
105 /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
106 mutex_lock(&userns_state_mutex);
107 ns->flags = parent_ns->flags;
108 mutex_unlock(&userns_state_mutex);
109
110 #ifdef CONFIG_LOD_SEC
111 if (0 != (current_cred()->uid.val)){
112 ret = -EPERM;
113 printk(KERN_ERR "LOD: blocking USERNS from non-root PROC %s PID %d UID %d\n",
114 current->comm, current->pid, current_cred()->uid.val);
115 return ret;
116 }
117 #endif
118
119 set_cred_user_ns(new, ns);
120
121 #ifdef CONFIG_PERSISTENT_KEYRINGS
122 init_rwsem(&ns->persistent_keyring_register_sem);
123 #endif
124 return 0;
125 }
126
127 int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
128 {
129 struct cred *cred;
130 int err = -ENOMEM;
131
132 if (!(unshare_flags & CLONE_NEWUSER))
133 return 0;
134
135 cred = prepare_creds();
136 if (cred) {
137 err = create_user_ns(cred);
138 if (err)
139 put_cred(cred);
140 else
141 *new_cred = cred;
142 }
143
144 return err;
145 }
146
147 void free_user_ns(struct user_namespace *ns)
148 {
149 struct user_namespace *parent;
150
151 do {
152 parent = ns->parent;
153 #ifdef CONFIG_PERSISTENT_KEYRINGS
154 key_put(ns->persistent_keyring_register);
155 #endif
156 ns_free_inum(&ns->ns);
157 kmem_cache_free(user_ns_cachep, ns);
158 ns = parent;
159 } while (atomic_dec_and_test(&parent->count));
160 }
161 EXPORT_SYMBOL(free_user_ns);
162
163 static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
164 {
165 unsigned idx, extents;
166 u32 first, last, id2;
167
168 id2 = id + count - 1;
169
170 /* Find the matching extent */
171 extents = map->nr_extents;
172 smp_rmb();
173 for (idx = 0; idx < extents; idx++) {
174 first = map->extent[idx].first;
175 last = first + map->extent[idx].count - 1;
176 if (id >= first && id <= last &&
177 (id2 >= first && id2 <= last))
178 break;
179 }
180 /* Map the id or note failure */
181 if (idx < extents)
182 id = (id - first) + map->extent[idx].lower_first;
183 else
184 id = (u32) -1;
185
186 return id;
187 }
188
189 static u32 map_id_down(struct uid_gid_map *map, u32 id)
190 {
191 unsigned idx, extents;
192 u32 first, last;
193
194 /* Find the matching extent */
195 extents = map->nr_extents;
196 smp_rmb();
197 for (idx = 0; idx < extents; idx++) {
198 first = map->extent[idx].first;
199 last = first + map->extent[idx].count - 1;
200 if (id >= first && id <= last)
201 break;
202 }
203 /* Map the id or note failure */
204 if (idx < extents)
205 id = (id - first) + map->extent[idx].lower_first;
206 else
207 id = (u32) -1;
208
209 return id;
210 }
211
212 static u32 map_id_up(struct uid_gid_map *map, u32 id)
213 {
214 unsigned idx, extents;
215 u32 first, last;
216
217 /* Find the matching extent */
218 extents = map->nr_extents;
219 smp_rmb();
220 for (idx = 0; idx < extents; idx++) {
221 first = map->extent[idx].lower_first;
222 last = first + map->extent[idx].count - 1;
223 if (id >= first && id <= last)
224 break;
225 }
226 /* Map the id or note failure */
227 if (idx < extents)
228 id = (id - first) + map->extent[idx].first;
229 else
230 id = (u32) -1;
231
232 return id;
233 }
234
235 /**
236 * make_kuid - Map a user-namespace uid pair into a kuid.
237 * @ns: User namespace that the uid is in
238 * @uid: User identifier
239 *
240 * Maps a user-namespace uid pair into a kernel internal kuid,
241 * and returns that kuid.
242 *
243 * When there is no mapping defined for the user-namespace uid
244 * pair INVALID_UID is returned. Callers are expected to test
245 * for and handle INVALID_UID being returned. INVALID_UID
246 * may be tested for using uid_valid().
247 */
248 kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
249 {
250 /* Map the uid to a global kernel uid */
251 return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
252 }
253 EXPORT_SYMBOL(make_kuid);
254
255 /**
256 * from_kuid - Create a uid from a kuid user-namespace pair.
257 * @targ: The user namespace we want a uid in.
258 * @kuid: The kernel internal uid to start with.
259 *
260 * Map @kuid into the user-namespace specified by @targ and
261 * return the resulting uid.
262 *
263 * There is always a mapping into the initial user_namespace.
264 *
265 * If @kuid has no mapping in @targ (uid_t)-1 is returned.
266 */
267 uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
268 {
269 /* Map the uid from a global kernel uid */
270 return map_id_up(&targ->uid_map, __kuid_val(kuid));
271 }
272 EXPORT_SYMBOL(from_kuid);
273
274 /**
275 * from_kuid_munged - Create a uid from a kuid user-namespace pair.
276 * @targ: The user namespace we want a uid in.
277 * @kuid: The kernel internal uid to start with.
278 *
279 * Map @kuid into the user-namespace specified by @targ and
280 * return the resulting uid.
281 *
282 * There is always a mapping into the initial user_namespace.
283 *
284 * Unlike from_kuid from_kuid_munged never fails and always
285 * returns a valid uid. This makes from_kuid_munged appropriate
286 * for use in syscalls like stat and getuid where failing the
287 * system call and failing to provide a valid uid are not an
288 * options.
289 *
290 * If @kuid has no mapping in @targ overflowuid is returned.
291 */
292 uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
293 {
294 uid_t uid;
295 uid = from_kuid(targ, kuid);
296
297 if (uid == (uid_t) -1)
298 uid = overflowuid;
299 return uid;
300 }
301 EXPORT_SYMBOL(from_kuid_munged);
302
303 /**
304 * make_kgid - Map a user-namespace gid pair into a kgid.
305 * @ns: User namespace that the gid is in
306 * @gid: group identifier
307 *
308 * Maps a user-namespace gid pair into a kernel internal kgid,
309 * and returns that kgid.
310 *
311 * When there is no mapping defined for the user-namespace gid
312 * pair INVALID_GID is returned. Callers are expected to test
313 * for and handle INVALID_GID being returned. INVALID_GID may be
314 * tested for using gid_valid().
315 */
316 kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
317 {
318 /* Map the gid to a global kernel gid */
319 return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
320 }
321 EXPORT_SYMBOL(make_kgid);
322
323 /**
324 * from_kgid - Create a gid from a kgid user-namespace pair.
325 * @targ: The user namespace we want a gid in.
326 * @kgid: The kernel internal gid to start with.
327 *
328 * Map @kgid into the user-namespace specified by @targ and
329 * return the resulting gid.
330 *
331 * There is always a mapping into the initial user_namespace.
332 *
333 * If @kgid has no mapping in @targ (gid_t)-1 is returned.
334 */
335 gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
336 {
337 /* Map the gid from a global kernel gid */
338 return map_id_up(&targ->gid_map, __kgid_val(kgid));
339 }
340 EXPORT_SYMBOL(from_kgid);
341
342 /**
343 * from_kgid_munged - Create a gid from a kgid user-namespace pair.
344 * @targ: The user namespace we want a gid in.
345 * @kgid: The kernel internal gid to start with.
346 *
347 * Map @kgid into the user-namespace specified by @targ and
348 * return the resulting gid.
349 *
350 * There is always a mapping into the initial user_namespace.
351 *
352 * Unlike from_kgid from_kgid_munged never fails and always
353 * returns a valid gid. This makes from_kgid_munged appropriate
354 * for use in syscalls like stat and getgid where failing the
355 * system call and failing to provide a valid gid are not options.
356 *
357 * If @kgid has no mapping in @targ overflowgid is returned.
358 */
359 gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
360 {
361 gid_t gid;
362 gid = from_kgid(targ, kgid);
363
364 if (gid == (gid_t) -1)
365 gid = overflowgid;
366 return gid;
367 }
368 EXPORT_SYMBOL(from_kgid_munged);
369
370 /**
371 * make_kprojid - Map a user-namespace projid pair into a kprojid.
372 * @ns: User namespace that the projid is in
373 * @projid: Project identifier
374 *
375 * Maps a user-namespace uid pair into a kernel internal kuid,
376 * and returns that kuid.
377 *
378 * When there is no mapping defined for the user-namespace projid
379 * pair INVALID_PROJID is returned. Callers are expected to test
380 * for and handle handle INVALID_PROJID being returned. INVALID_PROJID
381 * may be tested for using projid_valid().
382 */
383 kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
384 {
385 /* Map the uid to a global kernel uid */
386 return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
387 }
388 EXPORT_SYMBOL(make_kprojid);
389
390 /**
391 * from_kprojid - Create a projid from a kprojid user-namespace pair.
392 * @targ: The user namespace we want a projid in.
393 * @kprojid: The kernel internal project identifier to start with.
394 *
395 * Map @kprojid into the user-namespace specified by @targ and
396 * return the resulting projid.
397 *
398 * There is always a mapping into the initial user_namespace.
399 *
400 * If @kprojid has no mapping in @targ (projid_t)-1 is returned.
401 */
402 projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
403 {
404 /* Map the uid from a global kernel uid */
405 return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
406 }
407 EXPORT_SYMBOL(from_kprojid);
408
409 /**
410 * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
411 * @targ: The user namespace we want a projid in.
412 * @kprojid: The kernel internal projid to start with.
413 *
414 * Map @kprojid into the user-namespace specified by @targ and
415 * return the resulting projid.
416 *
417 * There is always a mapping into the initial user_namespace.
418 *
419 * Unlike from_kprojid from_kprojid_munged never fails and always
420 * returns a valid projid. This makes from_kprojid_munged
421 * appropriate for use in syscalls like stat and where
422 * failing the system call and failing to provide a valid projid are
423 * not an options.
424 *
425 * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
426 */
427 projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
428 {
429 projid_t projid;
430 projid = from_kprojid(targ, kprojid);
431
432 if (projid == (projid_t) -1)
433 projid = OVERFLOW_PROJID;
434 return projid;
435 }
436 EXPORT_SYMBOL(from_kprojid_munged);
437
438
439 static int uid_m_show(struct seq_file *seq, void *v)
440 {
441 struct user_namespace *ns = seq->private;
442 struct uid_gid_extent *extent = v;
443 struct user_namespace *lower_ns;
444 uid_t lower;
445
446 lower_ns = seq_user_ns(seq);
447 if ((lower_ns == ns) && lower_ns->parent)
448 lower_ns = lower_ns->parent;
449
450 lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
451
452 seq_printf(seq, "%10u %10u %10u\n",
453 extent->first,
454 lower,
455 extent->count);
456
457 return 0;
458 }
459
460 static int gid_m_show(struct seq_file *seq, void *v)
461 {
462 struct user_namespace *ns = seq->private;
463 struct uid_gid_extent *extent = v;
464 struct user_namespace *lower_ns;
465 gid_t lower;
466
467 lower_ns = seq_user_ns(seq);
468 if ((lower_ns == ns) && lower_ns->parent)
469 lower_ns = lower_ns->parent;
470
471 lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
472
473 seq_printf(seq, "%10u %10u %10u\n",
474 extent->first,
475 lower,
476 extent->count);
477
478 return 0;
479 }
480
481 static int projid_m_show(struct seq_file *seq, void *v)
482 {
483 struct user_namespace *ns = seq->private;
484 struct uid_gid_extent *extent = v;
485 struct user_namespace *lower_ns;
486 projid_t lower;
487
488 lower_ns = seq_user_ns(seq);
489 if ((lower_ns == ns) && lower_ns->parent)
490 lower_ns = lower_ns->parent;
491
492 lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
493
494 seq_printf(seq, "%10u %10u %10u\n",
495 extent->first,
496 lower,
497 extent->count);
498
499 return 0;
500 }
501
502 static void *m_start(struct seq_file *seq, loff_t *ppos,
503 struct uid_gid_map *map)
504 {
505 struct uid_gid_extent *extent = NULL;
506 loff_t pos = *ppos;
507
508 if (pos < map->nr_extents)
509 extent = &map->extent[pos];
510
511 return extent;
512 }
513
514 static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
515 {
516 struct user_namespace *ns = seq->private;
517
518 return m_start(seq, ppos, &ns->uid_map);
519 }
520
521 static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
522 {
523 struct user_namespace *ns = seq->private;
524
525 return m_start(seq, ppos, &ns->gid_map);
526 }
527
528 static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
529 {
530 struct user_namespace *ns = seq->private;
531
532 return m_start(seq, ppos, &ns->projid_map);
533 }
534
535 static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
536 {
537 (*pos)++;
538 return seq->op->start(seq, pos);
539 }
540
541 static void m_stop(struct seq_file *seq, void *v)
542 {
543 return;
544 }
545
546 const struct seq_operations proc_uid_seq_operations = {
547 .start = uid_m_start,
548 .stop = m_stop,
549 .next = m_next,
550 .show = uid_m_show,
551 };
552
553 const struct seq_operations proc_gid_seq_operations = {
554 .start = gid_m_start,
555 .stop = m_stop,
556 .next = m_next,
557 .show = gid_m_show,
558 };
559
560 const struct seq_operations proc_projid_seq_operations = {
561 .start = projid_m_start,
562 .stop = m_stop,
563 .next = m_next,
564 .show = projid_m_show,
565 };
566
567 static bool mappings_overlap(struct uid_gid_map *new_map,
568 struct uid_gid_extent *extent)
569 {
570 u32 upper_first, lower_first, upper_last, lower_last;
571 unsigned idx;
572
573 upper_first = extent->first;
574 lower_first = extent->lower_first;
575 upper_last = upper_first + extent->count - 1;
576 lower_last = lower_first + extent->count - 1;
577
578 for (idx = 0; idx < new_map->nr_extents; idx++) {
579 u32 prev_upper_first, prev_lower_first;
580 u32 prev_upper_last, prev_lower_last;
581 struct uid_gid_extent *prev;
582
583 prev = &new_map->extent[idx];
584
585 prev_upper_first = prev->first;
586 prev_lower_first = prev->lower_first;
587 prev_upper_last = prev_upper_first + prev->count - 1;
588 prev_lower_last = prev_lower_first + prev->count - 1;
589
590 /* Does the upper range intersect a previous extent? */
591 if ((prev_upper_first <= upper_last) &&
592 (prev_upper_last >= upper_first))
593 return true;
594
595 /* Does the lower range intersect a previous extent? */
596 if ((prev_lower_first <= lower_last) &&
597 (prev_lower_last >= lower_first))
598 return true;
599 }
600 return false;
601 }
602
603 static ssize_t map_write(struct file *file, const char __user *buf,
604 size_t count, loff_t *ppos,
605 int cap_setid,
606 struct uid_gid_map *map,
607 struct uid_gid_map *parent_map)
608 {
609 struct seq_file *seq = file->private_data;
610 struct user_namespace *ns = seq->private;
611 struct uid_gid_map new_map;
612 unsigned idx;
613 struct uid_gid_extent *extent = NULL;
614 unsigned long page = 0;
615 char *kbuf, *pos, *next_line;
616 ssize_t ret = -EINVAL;
617
618 /*
619 * The userns_state_mutex serializes all writes to any given map.
620 *
621 * Any map is only ever written once.
622 *
623 * An id map fits within 1 cache line on most architectures.
624 *
625 * On read nothing needs to be done unless you are on an
626 * architecture with a crazy cache coherency model like alpha.
627 *
628 * There is a one time data dependency between reading the
629 * count of the extents and the values of the extents. The
630 * desired behavior is to see the values of the extents that
631 * were written before the count of the extents.
632 *
633 * To achieve this smp_wmb() is used on guarantee the write
634 * order and smp_rmb() is guaranteed that we don't have crazy
635 * architectures returning stale data.
636 */
637 mutex_lock(&userns_state_mutex);
638
639 ret = -EPERM;
640 /* Only allow one successful write to the map */
641 if (map->nr_extents != 0)
642 goto out;
643
644 /*
645 * Adjusting namespace settings requires capabilities on the target.
646 */
647 if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
648 goto out;
649
650 /* Get a buffer */
651 ret = -ENOMEM;
652 page = __get_free_page(GFP_TEMPORARY);
653 kbuf = (char *) page;
654 if (!page)
655 goto out;
656
657 /* Only allow < page size writes at the beginning of the file */
658 ret = -EINVAL;
659 if ((*ppos != 0) || (count >= PAGE_SIZE))
660 goto out;
661
662 /* Slurp in the user data */
663 ret = -EFAULT;
664 if (copy_from_user(kbuf, buf, count))
665 goto out;
666 kbuf[count] = '\0';
667
668 /* Parse the user data */
669 ret = -EINVAL;
670 pos = kbuf;
671 new_map.nr_extents = 0;
672 for (; pos; pos = next_line) {
673 extent = &new_map.extent[new_map.nr_extents];
674
675 /* Find the end of line and ensure I don't look past it */
676 next_line = strchr(pos, '\n');
677 if (next_line) {
678 *next_line = '\0';
679 next_line++;
680 if (*next_line == '\0')
681 next_line = NULL;
682 }
683
684 pos = skip_spaces(pos);
685 extent->first = simple_strtoul(pos, &pos, 10);
686 if (!isspace(*pos))
687 goto out;
688
689 pos = skip_spaces(pos);
690 extent->lower_first = simple_strtoul(pos, &pos, 10);
691 if (!isspace(*pos))
692 goto out;
693
694 pos = skip_spaces(pos);
695 extent->count = simple_strtoul(pos, &pos, 10);
696 if (*pos && !isspace(*pos))
697 goto out;
698
699 /* Verify there is not trailing junk on the line */
700 pos = skip_spaces(pos);
701 if (*pos != '\0')
702 goto out;
703
704 /* Verify we have been given valid starting values */
705 if ((extent->first == (u32) -1) ||
706 (extent->lower_first == (u32) -1))
707 goto out;
708
709 /* Verify count is not zero and does not cause the
710 * extent to wrap
711 */
712 if ((extent->first + extent->count) <= extent->first)
713 goto out;
714 if ((extent->lower_first + extent->count) <=
715 extent->lower_first)
716 goto out;
717
718 /* Do the ranges in extent overlap any previous extents? */
719 if (mappings_overlap(&new_map, extent))
720 goto out;
721
722 new_map.nr_extents++;
723
724 /* Fail if the file contains too many extents */
725 if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
726 (next_line != NULL))
727 goto out;
728 }
729 /* Be very certaint the new map actually exists */
730 if (new_map.nr_extents == 0)
731 goto out;
732
733 ret = -EPERM;
734 /* Validate the user is allowed to use user id's mapped to. */
735 if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
736 goto out;
737
738 /* Map the lower ids from the parent user namespace to the
739 * kernel global id space.
740 */
741 for (idx = 0; idx < new_map.nr_extents; idx++) {
742 u32 lower_first;
743 extent = &new_map.extent[idx];
744
745 lower_first = map_id_range_down(parent_map,
746 extent->lower_first,
747 extent->count);
748
749 /* Fail if we can not map the specified extent to
750 * the kernel global id space.
751 */
752 if (lower_first == (u32) -1)
753 goto out;
754
755 extent->lower_first = lower_first;
756 }
757
758 /* Install the map */
759 memcpy(map->extent, new_map.extent,
760 new_map.nr_extents*sizeof(new_map.extent[0]));
761 smp_wmb();
762 map->nr_extents = new_map.nr_extents;
763
764 *ppos = count;
765 ret = count;
766 out:
767 mutex_unlock(&userns_state_mutex);
768 if (page)
769 free_page(page);
770 return ret;
771 }
772
773 ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
774 size_t size, loff_t *ppos)
775 {
776 struct seq_file *seq = file->private_data;
777 struct user_namespace *ns = seq->private;
778 struct user_namespace *seq_ns = seq_user_ns(seq);
779
780 if (!ns->parent)
781 return -EPERM;
782
783 if ((seq_ns != ns) && (seq_ns != ns->parent))
784 return -EPERM;
785
786 return map_write(file, buf, size, ppos, CAP_SETUID,
787 &ns->uid_map, &ns->parent->uid_map);
788 }
789
790 ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
791 size_t size, loff_t *ppos)
792 {
793 struct seq_file *seq = file->private_data;
794 struct user_namespace *ns = seq->private;
795 struct user_namespace *seq_ns = seq_user_ns(seq);
796
797 if (!ns->parent)
798 return -EPERM;
799
800 if ((seq_ns != ns) && (seq_ns != ns->parent))
801 return -EPERM;
802
803 return map_write(file, buf, size, ppos, CAP_SETGID,
804 &ns->gid_map, &ns->parent->gid_map);
805 }
806
807 ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
808 size_t size, loff_t *ppos)
809 {
810 struct seq_file *seq = file->private_data;
811 struct user_namespace *ns = seq->private;
812 struct user_namespace *seq_ns = seq_user_ns(seq);
813
814 if (!ns->parent)
815 return -EPERM;
816
817 if ((seq_ns != ns) && (seq_ns != ns->parent))
818 return -EPERM;
819
820 /* Anyone can set any valid project id no capability needed */
821 return map_write(file, buf, size, ppos, -1,
822 &ns->projid_map, &ns->parent->projid_map);
823 }
824
825 static bool new_idmap_permitted(const struct file *file,
826 struct user_namespace *ns, int cap_setid,
827 struct uid_gid_map *new_map)
828 {
829 const struct cred *cred = file->f_cred;
830 /* Don't allow mappings that would allow anything that wouldn't
831 * be allowed without the establishment of unprivileged mappings.
832 */
833 if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
834 uid_eq(ns->owner, cred->euid)) {
835 u32 id = new_map->extent[0].lower_first;
836 if (cap_setid == CAP_SETUID) {
837 kuid_t uid = make_kuid(ns->parent, id);
838 if (uid_eq(uid, cred->euid))
839 return true;
840 } else if (cap_setid == CAP_SETGID) {
841 kgid_t gid = make_kgid(ns->parent, id);
842 if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
843 gid_eq(gid, cred->egid))
844 return true;
845 }
846 }
847
848 /* Allow anyone to set a mapping that doesn't require privilege */
849 if (!cap_valid(cap_setid))
850 return true;
851
852 /* Allow the specified ids if we have the appropriate capability
853 * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
854 * And the opener of the id file also had the approprpiate capability.
855 */
856 if (ns_capable(ns->parent, cap_setid) &&
857 file_ns_capable(file, ns->parent, cap_setid))
858 return true;
859
860 return false;
861 }
862
863 int proc_setgroups_show(struct seq_file *seq, void *v)
864 {
865 struct user_namespace *ns = seq->private;
866 unsigned long userns_flags = ACCESS_ONCE(ns->flags);
867
868 seq_printf(seq, "%s\n",
869 (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
870 "allow" : "deny");
871 return 0;
872 }
873
874 ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
875 size_t count, loff_t *ppos)
876 {
877 struct seq_file *seq = file->private_data;
878 struct user_namespace *ns = seq->private;
879 char kbuf[8], *pos;
880 bool setgroups_allowed;
881 ssize_t ret;
882
883 /* Only allow a very narrow range of strings to be written */
884 ret = -EINVAL;
885 if ((*ppos != 0) || (count >= sizeof(kbuf)))
886 goto out;
887
888 /* What was written? */
889 ret = -EFAULT;
890 if (copy_from_user(kbuf, buf, count))
891 goto out;
892 kbuf[count] = '\0';
893 pos = kbuf;
894
895 /* What is being requested? */
896 ret = -EINVAL;
897 if (strncmp(pos, "allow", 5) == 0) {
898 pos += 5;
899 setgroups_allowed = true;
900 }
901 else if (strncmp(pos, "deny", 4) == 0) {
902 pos += 4;
903 setgroups_allowed = false;
904 }
905 else
906 goto out;
907
908 /* Verify there is not trailing junk on the line */
909 pos = skip_spaces(pos);
910 if (*pos != '\0')
911 goto out;
912
913 ret = -EPERM;
914 mutex_lock(&userns_state_mutex);
915 if (setgroups_allowed) {
916 /* Enabling setgroups after setgroups has been disabled
917 * is not allowed.
918 */
919 if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
920 goto out_unlock;
921 } else {
922 /* Permanently disabling setgroups after setgroups has
923 * been enabled by writing the gid_map is not allowed.
924 */
925 if (ns->gid_map.nr_extents != 0)
926 goto out_unlock;
927 ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
928 }
929 mutex_unlock(&userns_state_mutex);
930
931 /* Report a successful write */
932 *ppos = count;
933 ret = count;
934 out:
935 return ret;
936 out_unlock:
937 mutex_unlock(&userns_state_mutex);
938 goto out;
939 }
940
941 bool userns_may_setgroups(const struct user_namespace *ns)
942 {
943 bool allowed;
944
945 mutex_lock(&userns_state_mutex);
946 /* It is not safe to use setgroups until a gid mapping in
947 * the user namespace has been established.
948 */
949 allowed = ns->gid_map.nr_extents != 0;
950 /* Is setgroups allowed? */
951 allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
952 mutex_unlock(&userns_state_mutex);
953
954 return allowed;
955 }
956
957 static inline struct user_namespace *to_user_ns(struct ns_common *ns)
958 {
959 return container_of(ns, struct user_namespace, ns);
960 }
961
962 static struct ns_common *userns_get(struct task_struct *task)
963 {
964 struct user_namespace *user_ns;
965
966 rcu_read_lock();
967 user_ns = get_user_ns(__task_cred(task)->user_ns);
968 rcu_read_unlock();
969
970 return user_ns ? &user_ns->ns : NULL;
971 }
972
973 static void userns_put(struct ns_common *ns)
974 {
975 put_user_ns(to_user_ns(ns));
976 }
977
978 static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
979 {
980 struct user_namespace *user_ns = to_user_ns(ns);
981 struct cred *cred;
982
983 /* Don't allow gaining capabilities by reentering
984 * the same user namespace.
985 */
986 if (user_ns == current_user_ns())
987 return -EINVAL;
988
989 /* Tasks that share a thread group must share a user namespace */
990 if (!thread_group_empty(current))
991 return -EINVAL;
992
993 if (current->fs->users != 1)
994 return -EINVAL;
995
996 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
997 return -EPERM;
998
999 cred = prepare_creds();
1000 if (!cred)
1001 return -ENOMEM;
1002
1003 put_user_ns(cred->user_ns);
1004 set_cred_user_ns(cred, get_user_ns(user_ns));
1005
1006 return commit_creds(cred);
1007 }
1008
1009 const struct proc_ns_operations userns_operations = {
1010 .name = "user",
1011 .type = CLONE_NEWUSER,
1012 .get = userns_get,
1013 .put = userns_put,
1014 .install = userns_install,
1015 };
1016
1017 static __init int user_namespaces_init(void)
1018 {
1019 user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
1020 return 0;
1021 }
1022 subsys_initcall(user_namespaces_init);