mem hotunplug: fix kfree() of bootmem memory
authorYasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Mon, 29 Apr 2013 22:08:56 +0000 (15:08 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 29 Apr 2013 22:54:40 +0000 (15:54 -0700)
When hot removing memory presented at boot time, following messages are shown:

  kernel BUG at mm/slub.c:3409!
  invalid opcode: 0000 [#1] SMP
  Modules linked in: ebtable_nat ebtables xt_CHECKSUM iptable_mangle bridge stp llc ipmi_devintf ipmi_msghandler sunrpc ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables binfmt_misc vfat fat dm_mirror dm_region_hash dm_log dm_mod vhost_net macvtap macvlan tun uinput iTCO_wdt iTCO_vendor_support coretemp kvm_intel kvm crc32c_intel ghash_clmulni_intel microcode pcspkr sg i2c_i801 lpc_ich mfd_core igb i2c_algo_bit i2c_core e1000e ptp pps_core tpm_infineon ioatdma dca sr_mod cdrom sd_mod crc_t10dif usb_storage megaraid_sas lpfc scsi_transport_fc scsi_tgt scsi_mod
  CPU 0
  Pid: 5091, comm: kworker/0:2 Tainted: G        W    3.9.0-rc6+ #15
  RIP: kfree+0x232/0x240
  Process kworker/0:2 (pid: 5091, threadinfo ffff88084678c000, task ffff88083928ca80)
  Call Trace:
    __release_region+0xd4/0xe0
    __remove_pages+0x52/0x110
    arch_remove_memory+0x89/0xd0
    remove_memory+0xc4/0x100
    acpi_memory_device_remove+0x6d/0xb1
    acpi_device_remove+0x89/0xab
    __device_release_driver+0x7c/0xf0
    device_release_driver+0x2f/0x50
    acpi_bus_device_detach+0x6c/0x70
    acpi_ns_walk_namespace+0x11a/0x250
    acpi_walk_namespace+0xee/0x137
    acpi_bus_trim+0x33/0x7a
    acpi_bus_hot_remove_device+0xc4/0x1a1
    acpi_os_execute_deferred+0x27/0x34
    process_one_work+0x1f7/0x590
    worker_thread+0x11a/0x370
    kthread+0xee/0x100
    ret_from_fork+0x7c/0xb0
  RIP  [<ffffffff811c41d2>] kfree+0x232/0x240
   RSP <ffff88084678d968>

The reason why the messages are shown is to release a resource
structure, allocated by bootmem, by kfree().  So when we release a
resource structure, we should check whether it is allocated by bootmem
or not.

But even if we know a resource structure is allocated by bootmem, we
cannot release it since SLxB cannot treat it.  So for reusing a resource
structure, this patch remembers it by using bootmem_resource as follows:

When releasing a resource structure by free_resource(), free_resource()
checks whether the resource structure is allocated by bootmem or not.
If it is allocated by bootmem, free_resource() adds it to
bootmem_resource.  If it is not allocated by bootmem, free_resource()
release it by kfree().

And when getting a new resource structure by get_resource(),
get_resource() checks whether bootmem_resource has released resource
structures or not.  If there is a released resource structure,
get_resource() returns it.  If there is not a releaed resource
structure, get_resource() returns new resource structure allocated by
kzalloc().

[akpm@linux-foundation.org: s/get_resource/alloc_resource/]
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Reviewed-by: Toshi Kani <toshi.kani@hp.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Ram Pai <linuxram@us.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
kernel/resource.c

index 4aef8867fd4ba3105452046e3d1ada3ff45bdc4d..d7386986e10e31edda59fcb15c4b83b236d52993 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/seq_file.h>
 #include <linux/device.h>
 #include <linux/pfn.h>
+#include <linux/mm.h>
 #include <asm/io.h>
 
 
@@ -50,6 +51,14 @@ struct resource_constraint {
 
 static DEFINE_RWLOCK(resource_lock);
 
+/*
+ * For memory hotplug, there is no way to free resource entries allocated
+ * by boot mem after the system is up. So for reusing the resource entry
+ * we need to remember the resource.
+ */
+static struct resource *bootmem_resource_free;
+static DEFINE_SPINLOCK(bootmem_resource_lock);
+
 static void *r_next(struct seq_file *m, void *v, loff_t *pos)
 {
        struct resource *p = v;
@@ -151,6 +160,40 @@ __initcall(ioresources_init);
 
 #endif /* CONFIG_PROC_FS */
 
+static void free_resource(struct resource *res)
+{
+       if (!res)
+               return;
+
+       if (!PageSlab(virt_to_head_page(res))) {
+               spin_lock(&bootmem_resource_lock);
+               res->sibling = bootmem_resource_free;
+               bootmem_resource_free = res;
+               spin_unlock(&bootmem_resource_lock);
+       } else {
+               kfree(res);
+       }
+}
+
+static struct resource *alloc_resource(gfp_t flags)
+{
+       struct resource *res = NULL;
+
+       spin_lock(&bootmem_resource_lock);
+       if (bootmem_resource_free) {
+               res = bootmem_resource_free;
+               bootmem_resource_free = res->sibling;
+       }
+       spin_unlock(&bootmem_resource_lock);
+
+       if (res)
+               memset(res, 0, sizeof(struct resource));
+       else
+               res = kzalloc(sizeof(struct resource), flags);
+
+       return res;
+}
+
 /* Return the conflict entry if you can't request it */
 static struct resource * __request_resource(struct resource *root, struct resource *new)
 {
@@ -771,7 +814,7 @@ static void __init __reserve_region_with_split(struct resource *root,
 {
        struct resource *parent = root;
        struct resource *conflict;
-       struct resource *res = kzalloc(sizeof(*res), GFP_ATOMIC);
+       struct resource *res = alloc_resource(GFP_ATOMIC);
        struct resource *next_res = NULL;
 
        if (!res)
@@ -796,7 +839,7 @@ static void __init __reserve_region_with_split(struct resource *root,
                /* conflict covered whole area */
                if (conflict->start <= res->start &&
                                conflict->end >= res->end) {
-                       kfree(res);
+                       free_resource(res);
                        WARN_ON(next_res);
                        break;
                }
@@ -806,10 +849,9 @@ static void __init __reserve_region_with_split(struct resource *root,
                        end = res->end;
                        res->end = conflict->start - 1;
                        if (conflict->end < end) {
-                               next_res = kzalloc(sizeof(*next_res),
-                                               GFP_ATOMIC);
+                               next_res = alloc_resource(GFP_ATOMIC);
                                if (!next_res) {
-                                       kfree(res);
+                                       free_resource(res);
                                        break;
                                }
                                next_res->name = name;
@@ -899,7 +941,7 @@ struct resource * __request_region(struct resource *parent,
                                   const char *name, int flags)
 {
        DECLARE_WAITQUEUE(wait, current);
-       struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
+       struct resource *res = alloc_resource(GFP_KERNEL);
 
        if (!res)
                return NULL;
@@ -933,7 +975,7 @@ struct resource * __request_region(struct resource *parent,
                        continue;
                }
                /* Uhhuh, that didn't work out.. */
-               kfree(res);
+               free_resource(res);
                res = NULL;
                break;
        }
@@ -967,7 +1009,7 @@ int __check_region(struct resource *parent, resource_size_t start,
                return -EBUSY;
 
        release_resource(res);
-       kfree(res);
+       free_resource(res);
        return 0;
 }
 EXPORT_SYMBOL(__check_region);
@@ -1007,7 +1049,7 @@ void __release_region(struct resource *parent, resource_size_t start,
                        write_unlock(&resource_lock);
                        if (res->flags & IORESOURCE_MUXED)
                                wake_up(&muxed_resource_wait);
-                       kfree(res);
+                       free_resource(res);
                        return;
                }
                p = &res->sibling;
@@ -1055,8 +1097,8 @@ int release_mem_region_adjustable(struct resource *parent,
        if ((start < parent->start) || (end > parent->end))
                return ret;
 
-       /* The kzalloc() result gets checked later */
-       new_res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+       /* The alloc_resource() result gets checked later */
+       new_res = alloc_resource(GFP_KERNEL);
 
        p = &parent->child;
        write_lock(&resource_lock);
@@ -1083,7 +1125,7 @@ int release_mem_region_adjustable(struct resource *parent,
                if (res->start == start && res->end == end) {
                        /* free the whole entry */
                        *p = res->sibling;
-                       kfree(res);
+                       free_resource(res);
                        ret = 0;
                } else if (res->start == start && res->end != end) {
                        /* adjust the start */
@@ -1119,7 +1161,7 @@ int release_mem_region_adjustable(struct resource *parent,
        }
 
        write_unlock(&resource_lock);
-       kfree(new_res);
+       free_resource(new_res);
        return ret;
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */