slub: remove one code path and reduce lock contention in __slab_free()
authorJoonsoo Kim <js1304@gmail.com>
Wed, 15 Aug 2012 15:02:40 +0000 (00:02 +0900)
committerPekka Enberg <penberg@kernel.org>
Fri, 19 Oct 2012 07:19:24 +0000 (10:19 +0300)
When we try to free object, there is some of case that we need
to take a node lock. This is the necessary step for preventing a race.
After taking a lock, then we try to cmpxchg_double_slab().
But, there is a possible scenario that cmpxchg_double_slab() is failed
with taking a lock. Following example explains it.

CPU A               CPU B
need lock
...                 need lock
...                 lock!!
lock..but spin      free success
spin...             unlock
lock!!
free fail

In this case, retry with taking a lock is occured in CPU A.
I think that in this case for CPU A,
"release a lock first, and re-take a lock if necessary" is preferable way.

There are two reasons for this.

First, this makes __slab_free()'s logic somehow simple.
With this patch, 'was_frozen = 1' is "always" handled without taking a lock.
So we can remove one code path.

Second, it may reduce lock contention.
When we do retrying, status of slab is already changed,
so we don't need a lock anymore in almost every case.
"release a lock first, and re-take a lock if necessary" policy is
helpful to this.

Signed-off-by: Joonsoo Kim <js1304@gmail.com>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
mm/slub.c

index a0d698467f706bd617b960240cbb775cdc9cd034..e7aec2001ae5148aa530be3cc11e41ad1071a499 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2459,7 +2459,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
        void *prior;
        void **object = (void *)x;
        int was_frozen;
-       int inuse;
        struct page new;
        unsigned long counters;
        struct kmem_cache_node *n = NULL;
@@ -2472,13 +2471,17 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
                return;
 
        do {
+               if (unlikely(n)) {
+                       spin_unlock_irqrestore(&n->list_lock, flags);
+                       n = NULL;
+               }
                prior = page->freelist;
                counters = page->counters;
                set_freepointer(s, object, prior);
                new.counters = counters;
                was_frozen = new.frozen;
                new.inuse--;
-               if ((!new.inuse || !prior) && !was_frozen && !n) {
+               if ((!new.inuse || !prior) && !was_frozen) {
 
                        if (!kmem_cache_debug(s) && !prior)
 
@@ -2503,7 +2506,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 
                        }
                }
-               inuse = new.inuse;
 
        } while (!cmpxchg_double_slab(s, page,
                prior, counters,
@@ -2529,25 +2531,17 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
                 return;
         }
 
+       if (unlikely(!new.inuse && n->nr_partial > s->min_partial))
+               goto slab_empty;
+
        /*
-        * was_frozen may have been set after we acquired the list_lock in
-        * an earlier loop. So we need to check it here again.
+        * Objects left in the slab. If it was not on the partial list before
+        * then add it.
         */
-       if (was_frozen)
-               stat(s, FREE_FROZEN);
-       else {
-               if (unlikely(!inuse && n->nr_partial > s->min_partial))
-                        goto slab_empty;
-
-               /*
-                * Objects left in the slab. If it was not on the partial list before
-                * then add it.
-                */
-               if (unlikely(!prior)) {
-                       remove_full(s, page);
-                       add_partial(n, page, DEACTIVATE_TO_TAIL);
-                       stat(s, FREE_ADD_PARTIAL);
-               }
+       if (kmem_cache_debug(s) && unlikely(!prior)) {
+               remove_full(s, page);
+               add_partial(n, page, DEACTIVATE_TO_TAIL);
+               stat(s, FREE_ADD_PARTIAL);
        }
        spin_unlock_irqrestore(&n->list_lock, flags);
        return;