sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK
authorAndy Lutomirski <luto@kernel.org>
Fri, 16 Sep 2016 05:45:48 +0000 (22:45 -0700)
committerIngo Molnar <mingo@kernel.org>
Fri, 16 Sep 2016 07:18:54 +0000 (09:18 +0200)
We currently keep every task's stack around until the task_struct
itself is freed.  This means that we keep the stack allocation alive
for longer than necessary and that, under load, we free stacks in
big batches whenever RCU drops the last task reference.  Neither of
these is good for reuse of cache-hot memory, and freeing in batches
prevents us from usefully caching small numbers of vmalloced stacks.

On architectures that have thread_info on the stack, we can't easily
change this, but on architectures that set THREAD_INFO_IN_TASK, we
can free it as soon as the task is dead.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jann Horn <jann@thejh.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/08ca06cde00ebed0046c5d26cbbf3fbb7ef5b812.1474003868.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/linux/init_task.h
include/linux/sched.h
kernel/fork.c
kernel/sched/core.c

index 9c04d44eeb3c14fd2f144054d207358434b55d0d..325f649d77ff24bb65a26be80964a8bea04671a8 100644 (file)
@@ -186,7 +186,9 @@ extern struct task_group root_task_group;
 #endif
 
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-# define INIT_TASK_TI(tsk) .thread_info = INIT_THREAD_INFO(tsk),
+# define INIT_TASK_TI(tsk)                     \
+       .thread_info = INIT_THREAD_INFO(tsk),   \
+       .stack_refcount = ATOMIC_INIT(1),
 #else
 # define INIT_TASK_TI(tsk)
 #endif
index a95867267e9fd3264a6fb576ac465337139f6bec..abb795afc8237ad5bef0cb8d61f3eafebaf0617d 100644 (file)
@@ -1936,6 +1936,10 @@ struct task_struct {
 #ifdef CONFIG_VMAP_STACK
        struct vm_struct *stack_vm_area;
 #endif
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       /* A live task holds one reference. */
+       atomic_t stack_refcount;
+#endif
 /* CPU-specific state of this task */
        struct thread_struct thread;
 /*
@@ -3143,12 +3147,22 @@ static inline unsigned long *end_of_stack(struct task_struct *p)
 
 #endif
 
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+       return atomic_inc_not_zero(&tsk->stack_refcount) ?
+               task_stack_page(tsk) : NULL;
+}
+
+extern void put_task_stack(struct task_struct *tsk);
+#else
 static inline void *try_get_task_stack(struct task_struct *tsk)
 {
        return task_stack_page(tsk);
 }
 
 static inline void put_task_stack(struct task_struct *tsk) {}
+#endif
 
 #define task_stack_end_corrupted(task) \
                (*(end_of_stack(task)) != STACK_END_MAGIC)
index 0c240fd5beba2c7eefc0df23375ea99291ce76e6..5dd0a516626d9d13ad09c6ad06a01105caa40193 100644 (file)
@@ -269,11 +269,40 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
        }
 }
 
-void free_task(struct task_struct *tsk)
+static void release_task_stack(struct task_struct *tsk)
 {
        account_kernel_stack(tsk, -1);
        arch_release_thread_stack(tsk->stack);
        free_thread_stack(tsk);
+       tsk->stack = NULL;
+#ifdef CONFIG_VMAP_STACK
+       tsk->stack_vm_area = NULL;
+#endif
+}
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+void put_task_stack(struct task_struct *tsk)
+{
+       if (atomic_dec_and_test(&tsk->stack_refcount))
+               release_task_stack(tsk);
+}
+#endif
+
+void free_task(struct task_struct *tsk)
+{
+#ifndef CONFIG_THREAD_INFO_IN_TASK
+       /*
+        * The task is finally done with both the stack and thread_info,
+        * so free both.
+        */
+       release_task_stack(tsk);
+#else
+       /*
+        * If the task had a separate stack allocation, it should be gone
+        * by now.
+        */
+       WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
+#endif
        rt_mutex_debug_task_free(tsk);
        ftrace_graph_exit_task(tsk);
        put_seccomp_filter(tsk);
@@ -411,6 +440,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #ifdef CONFIG_VMAP_STACK
        tsk->stack_vm_area = stack_vm_area;
 #endif
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       atomic_set(&tsk->stack_refcount, 1);
+#endif
 
        if (err)
                goto free_stack;
@@ -1771,6 +1803,7 @@ bad_fork_cleanup_count:
        atomic_dec(&p->cred->user->processes);
        exit_creds(p);
 bad_fork_free:
+       put_task_stack(p);
        free_task(p);
 fork_out:
        return ERR_PTR(retval);
index 0b6238f18da28ea5fdab2b535b88264ac1c035cd..23c6037e2d89b8cf93389d0a756b00f5bf668cbe 100644 (file)
@@ -2772,6 +2772,10 @@ static struct rq *finish_task_switch(struct task_struct *prev)
                 * task and put them back on the free list.
                 */
                kprobe_flush_task(prev);
+
+               /* Task is done with its stack. */
+               put_task_stack(prev);
+
                put_task_struct(prev);
        }