mm/vmscan.c: set correct defer count for shrinker
authorShaohua Li <shli@fb.com>
Tue, 13 Dec 2016 00:41:50 +0000 (16:41 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 13 Dec 2016 02:55:07 +0000 (18:55 -0800)
Our system uses significantly more slab memory with memcg enabled with
the latest kernel.  With 3.10 kernel, slab uses 2G memory, while with
4.6 kernel, 6G memory is used.  The shrinker has problem.  Let's see we
have two memcg for one shrinker.  In do_shrink_slab:

1. Check cg1.  nr_deferred = 0, assume total_scan = 700.  batch size
   is 1024, then no memory is freed.  nr_deferred = 700

2. Check cg2.  nr_deferred = 700.  Assume freeable = 20, then
   total_scan = 10 or 40.  Let's assume it's 10.  No memory is freed.
   nr_deferred = 10.

The deferred share of cg1 is lost in this case.  kswapd will free no
memory even run above steps again and again.

The fix makes sure one memcg's deferred share isn't lost.

Link: http://lkml.kernel.org/r/2414be961b5d25892060315fbb56bb19d81d0c07.1476227351.git.shli@fb.com
Signed-off-by: Shaohua Li <shli@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: <stable@vger.kernel.org> [4.0+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/vmscan.c

index d75cdf360730183e2f3690795b6bac8bcc594d36..c4abf08861d2520677bc43d5e0d628e36566f6a2 100644 (file)
@@ -291,6 +291,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
        int nid = shrinkctl->nid;
        long batch_size = shrinker->batch ? shrinker->batch
                                          : SHRINK_BATCH;
+       long scanned = 0, next_deferred;
 
        freeable = shrinker->count_objects(shrinker, shrinkctl);
        if (freeable == 0)
@@ -312,7 +313,9 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
                pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",
                       shrinker->scan_objects, total_scan);
                total_scan = freeable;
-       }
+               next_deferred = nr;
+       } else
+               next_deferred = total_scan;
 
        /*
         * We need to avoid excessive windup on filesystem shrinkers
@@ -369,17 +372,22 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
 
                count_vm_events(SLABS_SCANNED, nr_to_scan);
                total_scan -= nr_to_scan;
+               scanned += nr_to_scan;
 
                cond_resched();
        }
 
+       if (next_deferred >= scanned)
+               next_deferred -= scanned;
+       else
+               next_deferred = 0;
        /*
         * move the unused scan count back into the shrinker in a
         * manner that handles concurrent updates. If we exhausted the
         * scan, there is no need to do an update.
         */
-       if (total_scan > 0)
-               new_nr = atomic_long_add_return(total_scan,
+       if (next_deferred > 0)
+               new_nr = atomic_long_add_return(next_deferred,
                                                &shrinker->nr_deferred[nid]);
        else
                new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);