[PATCH] slab: fix handling of pages from foreign NUMA nodes
authorChristoph Lameter <clameter@engr.sgi.com>
Fri, 23 Sep 2005 04:44:02 +0000 (21:44 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Fri, 23 Sep 2005 05:17:35 +0000 (22:17 -0700)
The numa slab allocator may allocate pages from foreign nodes onto the
lists for a particular node if a node runs out of memory.  Inspecting the
slab->nodeid field will not reflect that the page is now in use for the
slabs of another node.

This patch fixes that issue by adding a node field to free_block so that
the caller can indicate which node currently uses a slab.

Also removes the check for the current node from kmalloc_cache_node since
the process may shift later to another node which may lead to an allocation
on another node than intended.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
mm/slab.c

index cf19ff2ab5e2bca587a645c241b24b65a7531901..c9adfce004056d2539ffa04a9ab09c1a2773d89d 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -640,7 +640,7 @@ static enum {
 
 static DEFINE_PER_CPU(struct work_struct, reap_work);
 
-static void free_block(kmem_cache_t* cachep, void** objpp, int len);
+static void free_block(kmem_cache_t* cachep, void** objpp, int len, int node);
 static void enable_cpucache (kmem_cache_t *cachep);
 static void cache_reap (void *unused);
 static int __node_shrink(kmem_cache_t *cachep, int node);
@@ -805,7 +805,7 @@ static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache
 
        if (ac->avail) {
                spin_lock(&rl3->list_lock);
-               free_block(cachep, ac->entry, ac->avail);
+               free_block(cachep, ac->entry, ac->avail, node);
                ac->avail = 0;
                spin_unlock(&rl3->list_lock);
        }
@@ -926,7 +926,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
                        /* Free limit for this kmem_list3 */
                        l3->free_limit -= cachep->batchcount;
                        if (nc)
-                               free_block(cachep, nc->entry, nc->avail);
+                               free_block(cachep, nc->entry, nc->avail, node);
 
                        if (!cpus_empty(mask)) {
                                 spin_unlock(&l3->list_lock);
@@ -935,7 +935,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
 
                        if (l3->shared) {
                                free_block(cachep, l3->shared->entry,
-                                               l3->shared->avail);
+                                               l3->shared->avail, node);
                                kfree(l3->shared);
                                l3->shared = NULL;
                        }
@@ -1883,12 +1883,13 @@ static void do_drain(void *arg)
 {
        kmem_cache_t *cachep = (kmem_cache_t*)arg;
        struct array_cache *ac;
+       int node = numa_node_id();
 
        check_irq_off();
        ac = ac_data(cachep);
-       spin_lock(&cachep->nodelists[numa_node_id()]->list_lock);
-       free_block(cachep, ac->entry, ac->avail);
-       spin_unlock(&cachep->nodelists[numa_node_id()]->list_lock);
+       spin_lock(&cachep->nodelists[node]->list_lock);
+       free_block(cachep, ac->entry, ac->avail, node);
+       spin_unlock(&cachep->nodelists[node]->list_lock);
        ac->avail = 0;
 }
 
@@ -2609,7 +2610,7 @@ done:
 /*
  * Caller needs to acquire correct kmem_list's list_lock
  */
-static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
+static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int node)
 {
        int i;
        struct kmem_list3 *l3;
@@ -2618,14 +2619,12 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
                void *objp = objpp[i];
                struct slab *slabp;
                unsigned int objnr;
-               int nodeid = 0;
 
                slabp = GET_PAGE_SLAB(virt_to_page(objp));
-               nodeid = slabp->nodeid;
-               l3 = cachep->nodelists[nodeid];
+               l3 = cachep->nodelists[node];
                list_del(&slabp->list);
                objnr = (objp - slabp->s_mem) / cachep->objsize;
-               check_spinlock_acquired_node(cachep, nodeid);
+               check_spinlock_acquired_node(cachep, node);
                check_slabp(cachep, slabp);
 
 
@@ -2665,13 +2664,14 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac)
 {
        int batchcount;
        struct kmem_list3 *l3;
+       int node = numa_node_id();
 
        batchcount = ac->batchcount;
 #if DEBUG
        BUG_ON(!batchcount || batchcount > ac->avail);
 #endif
        check_irq_off();
-       l3 = cachep->nodelists[numa_node_id()];
+       l3 = cachep->nodelists[node];
        spin_lock(&l3->list_lock);
        if (l3->shared) {
                struct array_cache *shared_array = l3->shared;
@@ -2687,7 +2687,7 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac)
                }
        }
 
-       free_block(cachep, ac->entry, batchcount);
+       free_block(cachep, ac->entry, batchcount, node);
 free_done:
 #if STATS
        {
@@ -2752,7 +2752,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
                        } else {
                                spin_lock(&(cachep->nodelists[nodeid])->
                                                list_lock);
-                               free_block(cachep, &objp, 1);
+                               free_block(cachep, &objp, 1, nodeid);
                                spin_unlock(&(cachep->nodelists[nodeid])->
                                                list_lock);
                        }
@@ -2845,7 +2845,7 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, i
        unsigned long save_flags;
        void *ptr;
 
-       if (nodeid == numa_node_id() || nodeid == -1)
+       if (nodeid == -1)
                return __cache_alloc(cachep, flags);
 
        if (unlikely(!cachep->nodelists[nodeid])) {
@@ -3080,7 +3080,7 @@ static int alloc_kmemlist(kmem_cache_t *cachep)
 
                        if ((nc = cachep->nodelists[node]->shared))
                                free_block(cachep, nc->entry,
-                                                       nc->avail);
+                                                       nc->avail, node);
 
                        l3->shared = new;
                        if (!cachep->nodelists[node]->alien) {
@@ -3161,7 +3161,7 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount,
                if (!ccold)
                        continue;
                spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
-               free_block(cachep, ccold->entry, ccold->avail);
+               free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
                spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
                kfree(ccold);
        }
@@ -3241,7 +3241,7 @@ static void drain_array_locked(kmem_cache_t *cachep,
                if (tofree > ac->avail) {
                        tofree = (ac->avail+1)/2;
                }
-               free_block(cachep, ac->entry, tofree);
+               free_block(cachep, ac->entry, tofree, node);
                ac->avail -= tofree;
                memmove(ac->entry, &(ac->entry[tofree]),
                                        sizeof(void*)*ac->avail);