[CELL] spufs: rework list management and associated locking
authorChristoph Hellwig <hch@lst.de>
Fri, 20 Jul 2007 19:39:54 +0000 (21:39 +0200)
committerArnd Bergmann <arnd@klappe.arndb.de>
Fri, 20 Jul 2007 19:42:28 +0000 (21:42 +0200)
This sorts out the various lists and related locks in the spu code.

In detail:

 - the per-node free_spus and active_list are gone.  Instead struct spu
   gained an alloc_state member telling whether the spu is free or not
 - the per-node spus array is now locked by a per-node mutex, which
   takes over from the global spu_lock and the per-node active_mutex
 - the spu_alloc* and spu_free function are gone as the state change is
   now done inline in the spufs code.  This allows some more sharing of
   code for the affinity vs normal case and more efficient locking
 - some little refactoring in the affinity code for this locking scheme

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
arch/powerpc/platforms/cell/spu_base.c
arch/powerpc/platforms/cell/spufs/sched.c
include/asm-powerpc/spu.h

index 8617b507af49859f749f82dea0699a9f9ee6f03b..90124228b8f43c37bd02f06e927402d29a5c058c 100644 (file)
@@ -409,7 +409,7 @@ static void spu_free_irqs(struct spu *spu)
                free_irq(spu->irqs[2], spu);
 }
 
-static void spu_init_channels(struct spu *spu)
+void spu_init_channels(struct spu *spu)
 {
        static const struct {
                 unsigned channel;
@@ -442,66 +442,7 @@ static void spu_init_channels(struct spu *spu)
                out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
        }
 }
-
-struct spu *spu_alloc_spu(struct spu *req_spu)
-{
-       struct spu *spu, *ret = NULL;
-
-       spin_lock(&spu_lock);
-       list_for_each_entry(spu, &cbe_spu_info[req_spu->node].free_spus, list) {
-               if (spu == req_spu) {
-                       list_del_init(&spu->list);
-                       pr_debug("Got SPU %d %d\n", spu->number, spu->node);
-                       spu_init_channels(spu);
-                       ret = spu;
-                       break;
-               }
-       }
-       spin_unlock(&spu_lock);
-       return ret;
-}
-EXPORT_SYMBOL_GPL(spu_alloc_spu);
-
-struct spu *spu_alloc_node(int node)
-{
-       struct spu *spu = NULL;
-
-       spin_lock(&spu_lock);
-       if (!list_empty(&cbe_spu_info[node].free_spus)) {
-               spu = list_entry(cbe_spu_info[node].free_spus.next, struct spu,
-                                                                       list);
-               list_del_init(&spu->list);
-               pr_debug("Got SPU %d %d\n", spu->number, spu->node);
-       }
-       spin_unlock(&spu_lock);
-
-       if (spu)
-               spu_init_channels(spu);
-       return spu;
-}
-EXPORT_SYMBOL_GPL(spu_alloc_node);
-
-struct spu *spu_alloc(void)
-{
-       struct spu *spu = NULL;
-       int node;
-
-       for (node = 0; node < MAX_NUMNODES; node++) {
-               spu = spu_alloc_node(node);
-               if (spu)
-                       break;
-       }
-
-       return spu;
-}
-
-void spu_free(struct spu *spu)
-{
-       spin_lock(&spu_lock);
-       list_add_tail(&spu->list, &cbe_spu_info[spu->node].free_spus);
-       spin_unlock(&spu_lock);
-}
-EXPORT_SYMBOL_GPL(spu_free);
+EXPORT_SYMBOL_GPL(spu_init_channels);
 
 static int spu_shutdown(struct sys_device *sysdev)
 {
@@ -597,6 +538,8 @@ static int __init create_spu(void *data)
        if (!spu)
                goto out;
 
+       spu->alloc_state = SPU_FREE;
+
        spin_lock_init(&spu->register_lock);
        spin_lock(&spu_lock);
        spu->number = number++;
@@ -617,11 +560,10 @@ static int __init create_spu(void *data)
        if (ret)
                goto out_free_irqs;
 
-       spin_lock(&spu_lock);
-       list_add(&spu->list, &cbe_spu_info[spu->node].free_spus);
+       mutex_lock(&cbe_spu_info[spu->node].list_mutex);
        list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
        cbe_spu_info[spu->node].n_spus++;
-       spin_unlock(&spu_lock);
+       mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
 
        mutex_lock(&spu_full_list_mutex);
        spin_lock_irqsave(&spu_full_list_lock, flags);
@@ -831,8 +773,8 @@ static int __init init_spu_base(void)
        int i, ret = 0;
 
        for (i = 0; i < MAX_NUMNODES; i++) {
+               mutex_init(&cbe_spu_info[i].list_mutex);
                INIT_LIST_HEAD(&cbe_spu_info[i].spus);
-               INIT_LIST_HEAD(&cbe_spu_info[i].free_spus);
        }
 
        if (!spu_management_ops)
index 44e2338a05d5cb1e48bca2f99c5c219c592f1c80..227968b4779d3adc531fac2a71ef68dd1f18c386 100644 (file)
@@ -51,9 +51,6 @@ struct spu_prio_array {
        DECLARE_BITMAP(bitmap, MAX_PRIO);
        struct list_head runq[MAX_PRIO];
        spinlock_t runq_lock;
-       struct list_head active_list[MAX_NUMNODES];
-       struct mutex active_mutex[MAX_NUMNODES];
-       int nr_active[MAX_NUMNODES];
        int nr_waiting;
 };
 
@@ -127,7 +124,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
        ctx->policy = current->policy;
 
        /*
-        * A lot of places that don't hold active_mutex poke into
+        * A lot of places that don't hold list_mutex poke into
         * cpus_allowed, including grab_runnable_context which
         * already holds the runq_lock.  So abuse runq_lock
         * to protect this field aswell.
@@ -141,9 +138,9 @@ void spu_update_sched_info(struct spu_context *ctx)
 {
        int node = ctx->spu->node;
 
-       mutex_lock(&spu_prio->active_mutex[node]);
+       mutex_lock(&cbe_spu_info[node].list_mutex);
        __spu_update_sched_info(ctx);
-       mutex_unlock(&spu_prio->active_mutex[node]);
+       mutex_unlock(&cbe_spu_info[node].list_mutex);
 }
 
 static int __node_allowed(struct spu_context *ctx, int node)
@@ -169,39 +166,6 @@ static int node_allowed(struct spu_context *ctx, int node)
        return rval;
 }
 
-/**
- * spu_add_to_active_list - add spu to active list
- * @spu:       spu to add to the active list
- */
-static void spu_add_to_active_list(struct spu *spu)
-{
-       int node = spu->node;
-
-       mutex_lock(&spu_prio->active_mutex[node]);
-       spu_prio->nr_active[node]++;
-       list_add_tail(&spu->list, &spu_prio->active_list[node]);
-       mutex_unlock(&spu_prio->active_mutex[node]);
-}
-
-static void __spu_remove_from_active_list(struct spu *spu)
-{
-       list_del_init(&spu->list);
-       spu_prio->nr_active[spu->node]--;
-}
-
-/**
- * spu_remove_from_active_list - remove spu from active list
- * @spu:       spu to remove from the active list
- */
-static void spu_remove_from_active_list(struct spu *spu)
-{
-       int node = spu->node;
-
-       mutex_lock(&spu_prio->active_mutex[node]);
-       __spu_remove_from_active_list(spu);
-       mutex_unlock(&spu_prio->active_mutex[node]);
-}
-
 static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
 
 void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
@@ -222,15 +186,18 @@ static void notify_spus_active(void)
         */
        for_each_online_node(node) {
                struct spu *spu;
-               mutex_lock(&spu_prio->active_mutex[node]);
-               list_for_each_entry(spu, &spu_prio->active_list[node], list) {
-                       struct spu_context *ctx = spu->ctx;
-                       set_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags);
-                       mb();   /* make sure any tasks woken up below */
-                               /* can see the bit(s) set above */
-                       wake_up_all(&ctx->stop_wq);
+
+               mutex_lock(&cbe_spu_info[node].list_mutex);
+               list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+                       if (spu->alloc_state != SPU_FREE) {
+                               struct spu_context *ctx = spu->ctx;
+                               set_bit(SPU_SCHED_NOTIFY_ACTIVE,
+                                       &ctx->sched_flags);
+                               mb();
+                               wake_up_all(&ctx->stop_wq);
+                       }
                }
-               mutex_unlock(&spu_prio->active_mutex[node]);
+               mutex_unlock(&cbe_spu_info[node].list_mutex);
        }
 }
 
@@ -293,10 +260,12 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
 }
 
 /*
- * XXX(hch): needs locking.
+ * Must be used with the list_mutex held.
  */
 static inline int sched_spu(struct spu *spu)
 {
+       BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
+
        return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
 }
 
@@ -349,11 +318,15 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
                node = (node < MAX_NUMNODES) ? node : 0;
                if (!node_allowed(ctx, node))
                        continue;
+               mutex_lock(&cbe_spu_info[node].list_mutex);
                list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
                        if ((!mem_aff || spu->has_mem_affinity) &&
-                                                       sched_spu(spu))
+                                                       sched_spu(spu)) {
+                               mutex_unlock(&cbe_spu_info[node].list_mutex);
                                return spu;
+                       }
                }
+               mutex_unlock(&cbe_spu_info[node].list_mutex);
        }
        return NULL;
 }
@@ -381,13 +354,14 @@ static void aff_set_ref_point_location(struct spu_gang *gang)
        gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset);
 }
 
-static struct spu *ctx_location(struct spu *ref, int offset)
+static struct spu *ctx_location(struct spu *ref, int offset, int node)
 {
        struct spu *spu;
 
        spu = NULL;
        if (offset >= 0) {
                list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
+                       BUG_ON(spu->node != node);
                        if (offset == 0)
                                break;
                        if (sched_spu(spu))
@@ -395,12 +369,14 @@ static struct spu *ctx_location(struct spu *ref, int offset)
                }
        } else {
                list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
+                       BUG_ON(spu->node != node);
                        if (offset == 0)
                                break;
                        if (sched_spu(spu))
                                offset++;
                }
        }
+
        return spu;
 }
 
@@ -408,13 +384,13 @@ static struct spu *ctx_location(struct spu *ref, int offset)
  * affinity_check is called each time a context is going to be scheduled.
  * It returns the spu ptr on which the context must run.
  */
-struct spu *affinity_check(struct spu_context *ctx)
+static int has_affinity(struct spu_context *ctx)
 {
-       struct spu_gang *gang;
+       struct spu_gang *gang = ctx->gang;
 
        if (list_empty(&ctx->aff_list))
-               return NULL;
-       gang = ctx->gang;
+               return 0;
+
        mutex_lock(&gang->aff_mutex);
        if (!gang->aff_ref_spu) {
                if (!(gang->aff_flags & AFF_MERGED))
@@ -424,9 +400,8 @@ struct spu *affinity_check(struct spu_context *ctx)
                aff_set_ref_point_location(gang);
        }
        mutex_unlock(&gang->aff_mutex);
-       if (!gang->aff_ref_spu)
-               return NULL;
-       return ctx_location(gang->aff_ref_spu, ctx->aff_offset);
+
+       return gang->aff_ref_spu != NULL;
 }
 
 /**
@@ -535,22 +510,41 @@ static void spu_prio_wait(struct spu_context *ctx)
 
 static struct spu *spu_get_idle(struct spu_context *ctx)
 {
-       struct spu *spu = NULL;
-       int node = cpu_to_node(raw_smp_processor_id());
-       int n;
+       struct spu *spu;
+       int node, n;
+
+       if (has_affinity(ctx)) {
+               node = ctx->gang->aff_ref_spu->node;
 
-       spu = affinity_check(ctx);
-       if (spu)
-               return spu_alloc_spu(spu);
+               mutex_lock(&cbe_spu_info[node].list_mutex);
+               spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node);
+               if (spu && spu->alloc_state == SPU_FREE)
+                       goto found;
+               mutex_unlock(&cbe_spu_info[node].list_mutex);
+               return NULL;
+       }
 
+       node = cpu_to_node(raw_smp_processor_id());
        for (n = 0; n < MAX_NUMNODES; n++, node++) {
                node = (node < MAX_NUMNODES) ? node : 0;
                if (!node_allowed(ctx, node))
                        continue;
-               spu = spu_alloc_node(node);
-               if (spu)
-                       break;
+
+               mutex_lock(&cbe_spu_info[node].list_mutex);
+               list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+                       if (spu->alloc_state == SPU_FREE)
+                               goto found;
+               }
+               mutex_unlock(&cbe_spu_info[node].list_mutex);
        }
+
+       return NULL;
+
+ found:
+       spu->alloc_state = SPU_USED;
+       mutex_unlock(&cbe_spu_info[node].list_mutex);
+       pr_debug("Got SPU %d %d\n", spu->number, spu->node);
+       spu_init_channels(spu);
        return spu;
 }
 
@@ -580,15 +574,15 @@ static struct spu *find_victim(struct spu_context *ctx)
                if (!node_allowed(ctx, node))
                        continue;
 
-               mutex_lock(&spu_prio->active_mutex[node]);
-               list_for_each_entry(spu, &spu_prio->active_list[node], list) {
+               mutex_lock(&cbe_spu_info[node].list_mutex);
+               list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
                        struct spu_context *tmp = spu->ctx;
 
                        if (tmp->prio > ctx->prio &&
                            (!victim || tmp->prio > victim->prio))
                                victim = spu->ctx;
                }
-               mutex_unlock(&spu_prio->active_mutex[node]);
+               mutex_unlock(&cbe_spu_info[node].list_mutex);
 
                if (victim) {
                        /*
@@ -613,7 +607,11 @@ static struct spu *find_victim(struct spu_context *ctx)
                                victim = NULL;
                                goto restart;
                        }
-                       spu_remove_from_active_list(spu);
+
+                       mutex_lock(&cbe_spu_info[node].list_mutex);
+                       cbe_spu_info[node].nr_active--;
+                       mutex_unlock(&cbe_spu_info[node].list_mutex);
+
                        spu_unbind_context(spu, victim);
                        victim->stats.invol_ctx_switch++;
                        spu->stats.invol_ctx_switch++;
@@ -662,8 +660,12 @@ int spu_activate(struct spu_context *ctx, unsigned long flags)
                if (!spu && rt_prio(ctx->prio))
                        spu = find_victim(ctx);
                if (spu) {
+                       int node = spu->node;
+
+                       mutex_lock(&cbe_spu_info[node].list_mutex);
                        spu_bind_context(spu, ctx);
-                       spu_add_to_active_list(spu);
+                       cbe_spu_info[node].nr_active++;
+                       mutex_unlock(&cbe_spu_info[node].list_mutex);
                        return 0;
                }
 
@@ -712,11 +714,17 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
        if (spu) {
                new = grab_runnable_context(max_prio, spu->node);
                if (new || force) {
-                       spu_remove_from_active_list(spu);
+                       int node = spu->node;
+
+                       mutex_lock(&cbe_spu_info[node].list_mutex);
                        spu_unbind_context(spu, ctx);
+                       spu->alloc_state = SPU_FREE;
+                       cbe_spu_info[node].nr_active--;
+                       mutex_unlock(&cbe_spu_info[node].list_mutex);
+
                        ctx->stats.vol_ctx_switch++;
                        spu->stats.vol_ctx_switch++;
-                       spu_free(spu);
+
                        if (new)
                                wake_up(&new->stop_wq);
                }
@@ -755,7 +763,7 @@ void spu_yield(struct spu_context *ctx)
        }
 }
 
-static void spusched_tick(struct spu_context *ctx)
+static noinline void spusched_tick(struct spu_context *ctx)
 {
        if (ctx->flags & SPU_CREATE_NOSCHED)
                return;
@@ -766,7 +774,7 @@ static void spusched_tick(struct spu_context *ctx)
                return;
 
        /*
-        * Unfortunately active_mutex ranks outside of state_mutex, so
+        * Unfortunately list_mutex ranks outside of state_mutex, so
         * we have to trylock here.  If we fail give the context another
         * tick and try again.
         */
@@ -776,12 +784,11 @@ static void spusched_tick(struct spu_context *ctx)
 
                new = grab_runnable_context(ctx->prio + 1, spu->node);
                if (new) {
-
-                       __spu_remove_from_active_list(spu);
                        spu_unbind_context(spu, ctx);
                        ctx->stats.invol_ctx_switch++;
                        spu->stats.invol_ctx_switch++;
-                       spu_free(spu);
+                       spu->alloc_state = SPU_FREE;
+                       cbe_spu_info[spu->node].nr_active--;
                        wake_up(&new->stop_wq);
                        /*
                         * We need to break out of the wait loop in
@@ -802,7 +809,7 @@ static void spusched_tick(struct spu_context *ctx)
  *
  * Return the number of tasks currently running or waiting to run.
  *
- * Note that we don't take runq_lock / active_mutex here.  Reading
+ * Note that we don't take runq_lock / list_mutex here.  Reading
  * a single 32bit value is atomic on powerpc, and we don't care
  * about memory ordering issues here.
  */
@@ -811,7 +818,7 @@ static unsigned long count_active_contexts(void)
        int nr_active = 0, node;
 
        for (node = 0; node < MAX_NUMNODES; node++)
-               nr_active += spu_prio->nr_active[node];
+               nr_active += cbe_spu_info[node].nr_active;
        nr_active += spu_prio->nr_waiting;
 
        return nr_active;
@@ -851,19 +858,18 @@ static void spusched_wake(unsigned long data)
 
 static int spusched_thread(void *unused)
 {
-       struct spu *spu, *next;
+       struct spu *spu;
        int node;
 
        while (!kthread_should_stop()) {
                set_current_state(TASK_INTERRUPTIBLE);
                schedule();
                for (node = 0; node < MAX_NUMNODES; node++) {
-                       mutex_lock(&spu_prio->active_mutex[node]);
-                       list_for_each_entry_safe(spu, next,
-                                                &spu_prio->active_list[node],
-                                                list)
-                               spusched_tick(spu->ctx);
-                       mutex_unlock(&spu_prio->active_mutex[node]);
+                       mutex_lock(&cbe_spu_info[node].list_mutex);
+                       list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+                               if (spu->ctx)
+                                       spusched_tick(spu->ctx);
+                       mutex_unlock(&cbe_spu_info[node].list_mutex);
                }
        }
 
@@ -922,8 +928,8 @@ int __init spu_sched_init(void)
                __clear_bit(i, spu_prio->bitmap);
        }
        for (i = 0; i < MAX_NUMNODES; i++) {
-               mutex_init(&spu_prio->active_mutex[i]);
-               INIT_LIST_HEAD(&spu_prio->active_list[i]);
+               mutex_init(&cbe_spu_info[i].list_mutex);
+               INIT_LIST_HEAD(&cbe_spu_info[i].spus);
        }
        spin_lock_init(&spu_prio->runq_lock);
 
@@ -954,7 +960,7 @@ int __init spu_sched_init(void)
 
 void spu_sched_exit(void)
 {
-       struct spu *spu, *tmp;
+       struct spu *spu;
        int node;
 
        remove_proc_entry("spu_loadavg", NULL);
@@ -963,13 +969,11 @@ void spu_sched_exit(void)
        kthread_stop(spusched_task);
 
        for (node = 0; node < MAX_NUMNODES; node++) {
-               mutex_lock(&spu_prio->active_mutex[node]);
-               list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
-                                        list) {
-                       list_del_init(&spu->list);
-                       spu_free(spu);
-               }
-               mutex_unlock(&spu_prio->active_mutex[node]);
+               mutex_lock(&cbe_spu_info[node].list_mutex);
+               list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+                       if (spu->alloc_state != SPU_FREE)
+                               spu->alloc_state = SPU_FREE;
+               mutex_unlock(&cbe_spu_info[node].list_mutex);
        }
        kfree(spu_prio);
 }
index a0f7fc8e23bb58962e1b3598cae5b663aa86d652..8836c0f1f2f74c2d266cebbdcc2136250a0a5186 100644 (file)
@@ -121,10 +121,9 @@ struct spu {
        unsigned long problem_phys;
        struct spu_problem __iomem *problem;
        struct spu_priv2 __iomem *priv2;
-       struct list_head list;
        struct list_head cbe_list;
-       struct list_head sched_list;
        struct list_head full_list;
+       enum { SPU_FREE, SPU_USED } alloc_state;
        int number;
        unsigned int irqs[3];
        u32 node;
@@ -187,18 +186,16 @@ struct spu {
 };
 
 struct cbe_spu_info {
+       struct mutex list_mutex;
        struct list_head spus;
-       struct list_head free_spus;
        int n_spus;
+       int nr_active;
        atomic_t reserved_spus;
 };
 
 extern struct cbe_spu_info cbe_spu_info[];
 
-struct spu *spu_alloc(void);
-struct spu *spu_alloc_node(int node);
-struct spu *spu_alloc_spu(struct spu *spu);
-void spu_free(struct spu *spu);
+void spu_init_channels(struct spu *spu);
 int spu_irq_class_0_bottom(struct spu *spu);
 int spu_irq_class_1_bottom(struct spu *spu);
 void spu_irq_setaffinity(struct spu *spu, int cpu);