Found this issue by kmemleak: the 'sg' and 'sgc' pointers from
__sdt_alloc() might be leaked as each domain holds many groups' ref,
but in destroy_sched_domain(), it only declined the first group ref.
Onlining and offlining a CPU can trigger this leak, and cause OOM.
Reproducer for my 6 CPUs machine:
while true
do
echo 0 > /sys/devices/system/cpu/cpu5/online;
echo 1 > /sys/devices/system/cpu/cpu5/online;
done
unreferenced object 0xffff88007d772a80 (size 64):
comm "cpuhp/5", pid 39, jiffies
4294719962 (age 35.251s)
hex dump (first 32 bytes):
c0 22 77 7d 00 88 ff ff 02 00 00 00 01 00 00 00 ."w}............
40 2a 77 7d 00 88 ff ff 00 00 00 00 00 00 00 00 @*w}............
backtrace:
[<
ffffffff8176525a>] kmemleak_alloc+0x4a/0xa0
[<
ffffffff8121efe1>] __kmalloc_node+0xf1/0x280
[<
ffffffff810d94a8>] build_sched_domains+0x1e8/0xf20
[<
ffffffff810da674>] partition_sched_domains+0x304/0x360
[<
ffffffff81139557>] cpuset_update_active_cpus+0x17/0x40
[<
ffffffff810bdb2e>] sched_cpu_activate+0xae/0xc0
[<
ffffffff810900e0>] cpuhp_invoke_callback+0x90/0x400
[<
ffffffff81090597>] cpuhp_up_callbacks+0x37/0xb0
[<
ffffffff81090887>] cpuhp_thread_fun+0xd7/0xf0
[<
ffffffff810b37e0>] smpboot_thread_fn+0x110/0x160
[<
ffffffff810af5d9>] kthread+0x109/0x140
[<
ffffffff81770e45>] ret_from_fork+0x25/0x30
[<
ffffffffffffffff>] 0xffffffffffffffff
unreferenced object 0xffff88007d772a40 (size 64):
comm "cpuhp/5", pid 39, jiffies
4294719962 (age 35.251s)
hex dump (first 32 bytes):
03 00 00 00 00 00 00 00 00 04 00 00 00 00 00 00 ................
00 04 00 00 00 00 00 00 4f 3c fc ff 00 00 00 00 ........O<......
backtrace:
[<
ffffffff8176525a>] kmemleak_alloc+0x4a/0xa0
[<
ffffffff8121efe1>] __kmalloc_node+0xf1/0x280
[<
ffffffff810da16d>] build_sched_domains+0xead/0xf20
[<
ffffffff810da674>] partition_sched_domains+0x304/0x360
[<
ffffffff81139557>] cpuset_update_active_cpus+0x17/0x40
[<
ffffffff810bdb2e>] sched_cpu_activate+0xae/0xc0
[<
ffffffff810900e0>] cpuhp_invoke_callback+0x90/0x400
[<
ffffffff81090597>] cpuhp_up_callbacks+0x37/0xb0
[<
ffffffff81090887>] cpuhp_thread_fun+0xd7/0xf0
[<
ffffffff810b37e0>] smpboot_thread_fn+0x110/0x160
[<
ffffffff810af5d9>] kthread+0x109/0x140
[<
ffffffff81770e45>] ret_from_fork+0x25/0x30
[<
ffffffffffffffff>] 0xffffffffffffffff
Reported-by: Chunyu Hu <chuhu@redhat.com>
Signed-off-by: Shu Wang <shuwang@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Chunyu Hu <chuhu@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: liwang@redhat.com
Link: http://lkml.kernel.org/r/1502351536-9108-1-git-send-email-shuwang@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
if (free_sgc && atomic_dec_and_test(&sg->sgc->ref))
kfree(sg->sgc);
- kfree(sg);
+ if (atomic_dec_and_test(&sg->ref))
+ kfree(sg);
sg = tmp;
} while (sg != first);
}
static void destroy_sched_domain(struct sched_domain *sd)
{
/*
- * If its an overlapping domain it has private groups, iterate and
- * nuke them all.
+ * A sched domain has many groups' reference, and an overlapping
+ * domain has private groups, iterate and nuke them all.
*/
- if (sd->flags & SD_OVERLAP) {
- free_sched_groups(sd->groups, 1);
- } else if (atomic_dec_and_test(&sd->groups->ref)) {
- kfree(sd->groups->sgc);
- kfree(sd->groups);
- }
+ free_sched_groups(sd->groups, 1);
+
if (sd->shared && atomic_dec_and_test(&sd->shared->ref))
kfree(sd->shared);
kfree(sd);
else
cpumask_copy(sg_span, sched_domain_span(sd));
+ atomic_inc(&sg->ref);
return sg;
}