pidns: Stop pid allocation when init dies
authorEric W. Biederman <ebiederm@xmission.com>
Sat, 22 Dec 2012 04:27:12 +0000 (20:27 -0800)
committerEric W. Biederman <ebiederm@xmission.com>
Wed, 26 Dec 2012 00:10:05 +0000 (16:10 -0800)
Oleg pointed out that in a pid namespace the sequence.
- pid 1 becomes a zombie
- setns(thepidns), fork,...
- reaping pid 1.
- The injected processes exiting.

Can lead to processes attempting access their child reaper and
instead following a stale pointer.

That waitpid for init can return before all of the processes in
the pid namespace have exited is also unfortunate.

Avoid these problems by disabling the allocation of new pids in a pid
namespace when init dies, instead of when the last process in a pid
namespace is reaped.

Pointed-out-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
include/linux/pid.h
include/linux/pid_namespace.h
kernel/pid.c
kernel/pid_namespace.c

index b152d44fb18122e0659a7cdabf0d6b782ef63d59..2381c973d897e8ae9147e465f5884ee7481012b2 100644 (file)
@@ -121,6 +121,7 @@ int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
 
 extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void free_pid(struct pid *pid);
+extern void disable_pid_allocation(struct pid_namespace *ns);
 
 /*
  * ns_of_pid() returns the pid namespace in which the specified pid was
index bf285999273a6bce0da2db81c007b643883f37ff..215e5e3dda1063e955c93fa566b46de16f6c8dc3 100644 (file)
@@ -21,7 +21,7 @@ struct pid_namespace {
        struct kref kref;
        struct pidmap pidmap[PIDMAP_ENTRIES];
        int last_pid;
-       int nr_hashed;
+       unsigned int nr_hashed;
        struct task_struct *child_reaper;
        struct kmem_cache *pid_cachep;
        unsigned int level;
@@ -42,6 +42,8 @@ struct pid_namespace {
 
 extern struct pid_namespace init_pid_ns;
 
+#define PIDNS_HASH_ADDING (1U << 31)
+
 #ifdef CONFIG_PID_NS
 static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 {
index 36aa02ff17d6c9cf67dcf39b0434898c700915ae..de9af600006f8cd73020b7d25b3dc4fc28be5eab 100644 (file)
@@ -270,7 +270,6 @@ void free_pid(struct pid *pid)
                        wake_up_process(ns->child_reaper);
                        break;
                case 0:
-                       ns->nr_hashed = -1;
                        schedule_work(&ns->proc_work);
                        break;
                }
@@ -319,7 +318,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 
        upid = pid->numbers + ns->level;
        spin_lock_irq(&pidmap_lock);
-       if (ns->nr_hashed < 0)
+       if (!(ns->nr_hashed & PIDNS_HASH_ADDING))
                goto out_unlock;
        for ( ; upid >= pid->numbers; --upid) {
                hlist_add_head_rcu(&upid->pid_chain,
@@ -342,6 +341,13 @@ out_free:
        goto out;
 }
 
+void disable_pid_allocation(struct pid_namespace *ns)
+{
+       spin_lock_irq(&pidmap_lock);
+       ns->nr_hashed &= ~PIDNS_HASH_ADDING;
+       spin_unlock_irq(&pidmap_lock);
+}
+
 struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
 {
        struct hlist_node *elem;
@@ -573,6 +579,9 @@ void __init pidhash_init(void)
 
 void __init pidmap_init(void)
 {
+       /* Veryify no one has done anything silly */
+       BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
+
        /* bump default and minimum pid_max based on number of cpus */
        pid_max = min(pid_max_max, max_t(int, pid_max,
                                PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
@@ -584,7 +593,7 @@ void __init pidmap_init(void)
        /* Reserve PID 0. We never call free_pidmap(0) */
        set_bit(0, init_pid_ns.pidmap[0].page);
        atomic_dec(&init_pid_ns.pidmap[0].nr_free);
-       init_pid_ns.nr_hashed = 1;
+       init_pid_ns.nr_hashed = PIDNS_HASH_ADDING;
 
        init_pid_ns.pid_cachep = KMEM_CACHE(pid,
                        SLAB_HWCACHE_ALIGN | SLAB_PANIC);
index fdbd0cdf271ae4ce6ad016f8b826510865fe3600..c1c3dc1c60233f337a01ff13587f1a5a7f57f1cd 100644 (file)
@@ -115,6 +115,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
        ns->level = level;
        ns->parent = get_pid_ns(parent_pid_ns);
        ns->user_ns = get_user_ns(user_ns);
+       ns->nr_hashed = PIDNS_HASH_ADDING;
        INIT_WORK(&ns->proc_work, proc_cleanup_work);
 
        set_bit(0, ns->pidmap[0].page);
@@ -181,6 +182,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
        int rc;
        struct task_struct *task, *me = current;
 
+       /* Don't allow any more processes into the pid namespace */
+       disable_pid_allocation(pid_ns);
+
        /* Ignore SIGCHLD causing any terminated children to autoreap */
        spin_lock_irq(&me->sighand->siglock);
        me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;