[PATCH] sched: consolidate sbe sbf
authorNick Piggin <nickpiggin@yahoo.com.au>
Sat, 25 Jun 2005 21:57:29 +0000 (14:57 -0700)
committerLinus Torvalds <torvalds@ppc970.osdl.org>
Sat, 25 Jun 2005 23:24:44 +0000 (16:24 -0700)
Consolidate balance-on-exec with balance-on-fork.  This is made easy by the
sched-domains RCU patches.

As well as the general goodness of code reduction, this allows the runqueues
to be unlocked during balance-on-fork.

schedstats is a problem.  Maybe just have balance-on-event instead of
distinguishing fork and exec?

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
include/linux/sched.h
kernel/fork.c
kernel/sched.c

index d27be933742510e60669d84fb19a6401862158af..edb2c69a88730e919433953bb0a4f2f8e00f4282 100644 (file)
@@ -930,7 +930,7 @@ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
 #else
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
-extern void FASTCALL(sched_fork(task_t * p));
+extern void FASTCALL(sched_fork(task_t * p, int clone_flags));
 extern void FASTCALL(sched_exit(task_t * p));
 
 extern int in_group_p(gid_t);
index a28d11e10877d07ebdd1ccde6006493122873b28..2c7806873bfd1b656b5dc033a6abaf4ff535531f 100644 (file)
@@ -1003,9 +1003,6 @@ static task_t *copy_process(unsigned long clone_flags,
        p->pdeath_signal = 0;
        p->exit_state = 0;
 
-       /* Perform scheduler related setup */
-       sched_fork(p);
-
        /*
         * Ok, make it visible to the rest of the system.
         * We dont wake it up yet.
@@ -1014,18 +1011,24 @@ static task_t *copy_process(unsigned long clone_flags,
        INIT_LIST_HEAD(&p->ptrace_children);
        INIT_LIST_HEAD(&p->ptrace_list);
 
+       /* Perform scheduler related setup. Assign this task to a CPU. */
+       sched_fork(p, clone_flags);
+
        /* Need tasklist lock for parent etc handling! */
        write_lock_irq(&tasklist_lock);
 
        /*
-        * The task hasn't been attached yet, so cpus_allowed mask cannot
-        * have changed. The cpus_allowed mask of the parent may have
-        * changed after it was copied first time, and it may then move to
-        * another CPU - so we re-copy it here and set the child's CPU to
-        * the parent's CPU. This avoids alot of nasty races.
+        * The task hasn't been attached yet, so its cpus_allowed mask will
+        * not be changed, nor will its assigned CPU.
+        *
+        * The cpus_allowed mask of the parent may have changed after it was
+        * copied first time - so re-copy it here, then check the child's CPU
+        * to ensure it is on a valid CPU (and if not, just force it back to
+        * parent's CPU). This avoids alot of nasty races.
         */
        p->cpus_allowed = current->cpus_allowed;
-       set_task_cpu(p, smp_processor_id());
+       if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed)))
+               set_task_cpu(p, smp_processor_id());
 
        /*
         * Check for pending SIGKILL! The new thread should not be allowed
index 54ce787b6207ff8077b1ab68308e2bedf5fcb9e4..579da278e72fd62ba1153dfc1bc0e1ab4a44b32b 100644 (file)
@@ -1021,8 +1021,59 @@ static int find_idlest_cpu(struct sched_group *group, int this_cpu)
        return idlest;
 }
 
+/*
+ * sched_balance_self: balance the current task (running on cpu) in domains
+ * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
+ * SD_BALANCE_EXEC.
+ *
+ * Balance, ie. select the least loaded group.
+ *
+ * Returns the target CPU number, or the same CPU if no balancing is needed.
+ *
+ * preempt must be disabled.
+ */
+static int sched_balance_self(int cpu, int flag)
+{
+       struct task_struct *t = current;
+       struct sched_domain *tmp, *sd = NULL;
 
-#endif
+       for_each_domain(cpu, tmp)
+               if (tmp->flags & flag)
+                       sd = tmp;
+
+       while (sd) {
+               cpumask_t span;
+               struct sched_group *group;
+               int new_cpu;
+               int weight;
+
+               span = sd->span;
+               group = find_idlest_group(sd, t, cpu);
+               if (!group)
+                       goto nextlevel;
+
+               new_cpu = find_idlest_cpu(group, cpu);
+               if (new_cpu == -1 || new_cpu == cpu)
+                       goto nextlevel;
+
+               /* Now try balancing at a lower domain level */
+               cpu = new_cpu;
+nextlevel:
+               sd = NULL;
+               weight = cpus_weight(span);
+               for_each_domain(cpu, tmp) {
+                       if (weight <= cpus_weight(tmp->span))
+                               break;
+                       if (tmp->flags & flag)
+                               sd = tmp;
+               }
+               /* while loop will break here if sd == NULL */
+       }
+
+       return cpu;
+}
+
+#endif /* CONFIG_SMP */
 
 /*
  * wake_idle() will wake a task on an idle cpu if task->cpu is
@@ -1240,8 +1291,15 @@ int fastcall wake_up_state(task_t *p, unsigned int state)
  * Perform scheduler related setup for a newly forked process p.
  * p is forked by current.
  */
-void fastcall sched_fork(task_t *p)
+void fastcall sched_fork(task_t *p, int clone_flags)
 {
+       int cpu = get_cpu();
+
+#ifdef CONFIG_SMP
+       cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
+#endif
+       set_task_cpu(p, cpu);
+
        /*
         * We mark the process as running here, but have not actually
         * inserted it onto the runqueue yet. This guarantees that
@@ -1282,12 +1340,10 @@ void fastcall sched_fork(task_t *p)
                 * runqueue lock is not a problem.
                 */
                current->time_slice = 1;
-               preempt_disable();
                scheduler_tick();
-               local_irq_enable();
-               preempt_enable();
-       } else
-               local_irq_enable();
+       }
+       local_irq_enable();
+       put_cpu();
 }
 
 /*
@@ -1302,64 +1358,12 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags)
        unsigned long flags;
        int this_cpu, cpu;
        runqueue_t *rq, *this_rq;
-#ifdef CONFIG_SMP
-       struct sched_domain *tmp, *sd = NULL;
-#endif
 
        rq = task_rq_lock(p, &flags);
        BUG_ON(p->state != TASK_RUNNING);
        this_cpu = smp_processor_id();
        cpu = task_cpu(p);
 
-#ifdef CONFIG_SMP
-       for_each_domain(cpu, tmp)
-               if (tmp->flags & SD_BALANCE_FORK)
-                       sd = tmp;
-
-       if (sd) {
-               cpumask_t span;
-               int new_cpu;
-               struct sched_group *group;
-
-again:
-               schedstat_inc(sd, sbf_cnt);
-               span = sd->span;
-               cpu = task_cpu(p);
-               group = find_idlest_group(sd, p, cpu);
-               if (!group) {
-                       schedstat_inc(sd, sbf_balanced);
-                       goto nextlevel;
-               }
-
-               new_cpu = find_idlest_cpu(group, cpu);
-               if (new_cpu == -1 || new_cpu == cpu) {
-                       schedstat_inc(sd, sbf_balanced);
-                       goto nextlevel;
-               }
-
-               if (cpu_isset(new_cpu, p->cpus_allowed)) {
-                       schedstat_inc(sd, sbf_pushed);
-                       set_task_cpu(p, new_cpu);
-                       task_rq_unlock(rq, &flags);
-                       rq = task_rq_lock(p, &flags);
-                       cpu = task_cpu(p);
-               }
-
-               /* Now try balancing at a lower domain level */
-nextlevel:
-               sd = NULL;
-               for_each_domain(cpu, tmp) {
-                       if (cpus_subset(span, tmp->span))
-                               break;
-                       if (tmp->flags & SD_BALANCE_FORK)
-                               sd = tmp;
-               }
-
-               if (sd)
-                       goto again;
-       }
-
-#endif
        /*
         * We decrease the sleep average of forking parents
         * and children as well, to keep max-interactive tasks
@@ -1708,58 +1712,16 @@ out:
 }
 
 /*
- * sched_exec(): find the highest-level, exec-balance-capable
- * domain and try to migrate the task to the least loaded CPU.
- *
- * execve() is a valuable balancing opportunity, because at this point
- * the task has the smallest effective memory and cache footprint.
+ * sched_exec - execve() is a valuable balancing opportunity, because at
+ * this point the task has the smallest effective memory and cache footprint.
  */
 void sched_exec(void)
 {
-       struct sched_domain *tmp, *sd = NULL;
        int new_cpu, this_cpu = get_cpu();
-
-       for_each_domain(this_cpu, tmp)
-               if (tmp->flags & SD_BALANCE_EXEC)
-                       sd = tmp;
-
-       if (sd) {
-               cpumask_t span;
-               struct sched_group *group;
-again:
-               schedstat_inc(sd, sbe_cnt);
-               span = sd->span;
-               group = find_idlest_group(sd, current, this_cpu);
-               if (!group) {
-                       schedstat_inc(sd, sbe_balanced);
-                       goto nextlevel;
-               }
-               new_cpu = find_idlest_cpu(group, this_cpu);
-               if (new_cpu == -1 || new_cpu == this_cpu) {
-                       schedstat_inc(sd, sbe_balanced);
-                       goto nextlevel;
-               }
-
-               schedstat_inc(sd, sbe_pushed);
-               put_cpu();
-               sched_migrate_task(current, new_cpu);
-
-               /* Now try balancing at a lower domain level */
-               this_cpu = get_cpu();
-nextlevel:
-               sd = NULL;
-               for_each_domain(this_cpu, tmp) {
-                       if (cpus_subset(span, tmp->span))
-                               break;
-                       if (tmp->flags & SD_BALANCE_EXEC)
-                               sd = tmp;
-               }
-
-               if (sd)
-                       goto again;
-       }
-
+       new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC);
        put_cpu();
+       if (new_cpu != this_cpu)
+               sched_migrate_task(current, new_cpu);
 }
 
 /*