mm/kasan: don't vfree() nonexistent vm_area
authorAndrey Ryabinin <aryabinin@virtuozzo.com>
Fri, 25 May 2018 21:47:38 +0000 (14:47 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 30 May 2018 05:51:49 +0000 (07:51 +0200)
commit 0f901dcbc31f88ae41a2aaa365f7802b5d520a28 upstream.

KASAN uses different routines to map shadow for hot added memory and
memory obtained in boot process.  Attempt to offline memory onlined by
normal boot process leads to this:

    Trying to vfree() nonexistent vm area (000000005d3b34b9)
    WARNING: CPU: 2 PID: 13215 at mm/vmalloc.c:1525 __vunmap+0x147/0x190

    Call Trace:
     kasan_mem_notifier+0xad/0xb9
     notifier_call_chain+0x166/0x260
     __blocking_notifier_call_chain+0xdb/0x140
     __offline_pages+0x96a/0xb10
     memory_subsys_offline+0x76/0xc0
     device_offline+0xb8/0x120
     store_mem_state+0xfa/0x120
     kernfs_fop_write+0x1d5/0x320
     __vfs_write+0xd4/0x530
     vfs_write+0x105/0x340
     SyS_write+0xb0/0x140

Obviously we can't call vfree() to free memory that wasn't allocated via
vmalloc().  Use find_vm_area() to see if we can call vfree().

Unfortunately it's a bit tricky to properly unmap and free shadow
allocated during boot, so we'll have to keep it.  If memory will come
online again that shadow will be reused.

Matthew asked: how can you call vfree() on something that isn't a
vmalloc address?

  vfree() is able to free any address returned by
  __vmalloc_node_range().  And __vmalloc_node_range() gives you any
  address you ask.  It doesn't have to be an address in [VMALLOC_START,
  VMALLOC_END] range.

  That's also how the module_alloc()/module_memfree() works on
  architectures that have designated area for modules.

[aryabinin@virtuozzo.com: improve comments]
Link: http://lkml.kernel.org/r/dabee6ab-3a7a-51cd-3b86-5468718e0390@virtuozzo.com
[akpm@linux-foundation.org: fix typos, reflow comment]
Link: http://lkml.kernel.org/r/20180201163349.8700-1-aryabinin@virtuozzo.com
Fixes: fa69b5989bb0 ("mm/kasan: add support for memory hotplug")
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Reported-by: Paul Menzel <pmenzel+linux-kasan-dev@molgen.mpg.de>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
mm/kasan/kasan.c

index 6f319fb817186e4b5c9ef62ff8a715bc28a0e292..858b96057e0409dd983404fa656c4ce551551c15 100644 (file)
@@ -737,6 +737,40 @@ void __asan_unpoison_stack_memory(const void *addr, size_t size)
 EXPORT_SYMBOL(__asan_unpoison_stack_memory);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
+static bool shadow_mapped(unsigned long addr)
+{
+       pgd_t *pgd = pgd_offset_k(addr);
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       if (pgd_none(*pgd))
+               return false;
+       p4d = p4d_offset(pgd, addr);
+       if (p4d_none(*p4d))
+               return false;
+       pud = pud_offset(p4d, addr);
+       if (pud_none(*pud))
+               return false;
+
+       /*
+        * We can't use pud_large() or pud_huge(), the first one is
+        * arch-specific, the last one depends on HUGETLB_PAGE.  So let's abuse
+        * pud_bad(), if pud is bad then it's bad because it's huge.
+        */
+       if (pud_bad(*pud))
+               return true;
+       pmd = pmd_offset(pud, addr);
+       if (pmd_none(*pmd))
+               return false;
+
+       if (pmd_bad(*pmd))
+               return true;
+       pte = pte_offset_kernel(pmd, addr);
+       return !pte_none(*pte);
+}
+
 static int __meminit kasan_mem_notifier(struct notifier_block *nb,
                        unsigned long action, void *data)
 {
@@ -758,6 +792,14 @@ static int __meminit kasan_mem_notifier(struct notifier_block *nb,
        case MEM_GOING_ONLINE: {
                void *ret;
 
+               /*
+                * If shadow is mapped already than it must have been mapped
+                * during the boot. This could happen if we onlining previously
+                * offlined memory.
+                */
+               if (shadow_mapped(shadow_start))
+                       return NOTIFY_OK;
+
                ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start,
                                        shadow_end, GFP_KERNEL,
                                        PAGE_KERNEL, VM_NO_GUARD,
@@ -769,8 +811,25 @@ static int __meminit kasan_mem_notifier(struct notifier_block *nb,
                kmemleak_ignore(ret);
                return NOTIFY_OK;
        }
-       case MEM_OFFLINE:
-               vfree((void *)shadow_start);
+       case MEM_OFFLINE: {
+               struct vm_struct *vm;
+
+               /*
+                * shadow_start was either mapped during boot by kasan_init()
+                * or during memory online by __vmalloc_node_range().
+                * In the latter case we can use vfree() to free shadow.
+                * Non-NULL result of the find_vm_area() will tell us if
+                * that was the second case.
+                *
+                * Currently it's not possible to free shadow mapped
+                * during boot by kasan_init(). It's because the code
+                * to do that hasn't been written yet. So we'll just
+                * leak the memory.
+                */
+               vm = find_vm_area((void *)shadow_start);
+               if (vm)
+                       vfree((void *)shadow_start);
+       }
        }
 
        return NOTIFY_OK;