do_wait() wakeup optimization: change __wake_up_parent() to use filtered wakeup
authorOleg Nesterov <oleg@redhat.com>
Wed, 23 Sep 2009 22:56:46 +0000 (15:56 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 24 Sep 2009 14:20:59 +0000 (07:20 -0700)
Ratan Nalumasu reported that in a process with many threads doing
unnecessary wakeups.  Every waiting thread in the process wakes up to loop
through the children and see that the only ones it cares about are still
not ready.

Now that we have struct wait_opts we can change do_wait/__wake_up_parent
to use filtered wakeups.

We can make child_wait_callback() more clever later, right now it only
checks eligible_child().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ratan Nalumasu <rnalumasu@gmail.com>
Cc: Vitaly Mayatskikh <vmayatsk@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Tested-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
kernel/exit.c
security/selinux/hooks.c

index ef2dfa818bf139e214b78833f243782358cb5988..7838b4d687743cb2e927e13d9cefe482e1b662d0 100644 (file)
@@ -1097,6 +1097,7 @@ struct wait_opts {
        int __user              *wo_stat;
        struct rusage __user    *wo_rusage;
 
+       wait_queue_t            child_wait;
        int                     notask_error;
 };
 
@@ -1570,20 +1571,35 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
        return 0;
 }
 
+static int child_wait_callback(wait_queue_t *wait, unsigned mode,
+                               int sync, void *key)
+{
+       struct wait_opts *wo = container_of(wait, struct wait_opts,
+                                               child_wait);
+       struct task_struct *p = key;
+
+       if (!eligible_child(wo, p))
+               return 0;
+
+       return default_wake_function(wait, mode, sync, key);
+}
+
 void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
 {
-       wake_up_interruptible_sync(&parent->signal->wait_chldexit);
+       __wake_up_sync_key(&parent->signal->wait_chldexit,
+                               TASK_INTERRUPTIBLE, 1, p);
 }
 
 static long do_wait(struct wait_opts *wo)
 {
-       DECLARE_WAITQUEUE(wait, current);
        struct task_struct *tsk;
        int retval;
 
        trace_sched_process_wait(wo->wo_pid);
 
-       add_wait_queue(&current->signal->wait_chldexit,&wait);
+       init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
+       wo->child_wait.private = current;
+       add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
 repeat:
        /*
         * If there is nothing that can match our critiera just get out.
@@ -1624,7 +1640,8 @@ notask:
        }
 end:
        __set_current_state(TASK_RUNNING);
-       remove_wait_queue(&current->signal->wait_chldexit,&wait);
+       remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
+
        if (wo->wo_info) {
                struct siginfo __user *infop = wo->wo_info;
 
index 417f7c9945229175f79842c5a9637af6fd48fc1d..bb230d5d7085a9612f915edbb50124a0077db4be 100644 (file)
@@ -2411,7 +2411,7 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
        /* Wake up the parent if it is waiting so that it can recheck
         * wait permission to the new task SID. */
        read_lock(&tasklist_lock);
-       wake_up_interruptible(&current->real_parent->signal->wait_chldexit);
+       __wake_up_parent(current, current->real_parent);
        read_unlock(&tasklist_lock);
 }