From dfec072ecd35ba6ecad2d51dde325253ac9a2936 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 4 Apr 2008 00:51:41 +0200 Subject: [PATCH] kmemcheck: add the kmemcheck core General description: kmemcheck is a patch to the linux kernel that detects use of uninitialized memory. It does this by trapping every read and write to memory that was allocated dynamically (e.g. using kmalloc()). If a memory address is read that has not previously been written to, a message is printed to the kernel log. Thanks to Andi Kleen for the set_memory_4k() solution. Andrew Morton suggested documenting the shadow member of struct page. Signed-off-by: Vegard Nossum Signed-off-by: Pekka Enberg [export kmemcheck_mark_initialized] [build fix for setup_max_cpus] Signed-off-by: Ingo Molnar [rebased for mainline inclusion] Signed-off-by: Vegard Nossum --- arch/x86/Makefile | 5 + arch/x86/include/asm/kmemcheck.h | 42 ++ arch/x86/include/asm/pgtable.h | 9 + arch/x86/include/asm/pgtable_types.h | 4 +- arch/x86/mm/Makefile | 2 + arch/x86/mm/kmemcheck/Makefile | 1 + arch/x86/mm/kmemcheck/error.c | 229 ++++++++++ arch/x86/mm/kmemcheck/error.h | 15 + arch/x86/mm/kmemcheck/kmemcheck.c | 650 +++++++++++++++++++++++++++ arch/x86/mm/kmemcheck/opcode.c | 101 +++++ arch/x86/mm/kmemcheck/opcode.h | 9 + arch/x86/mm/kmemcheck/pte.c | 22 + arch/x86/mm/kmemcheck/pte.h | 10 + arch/x86/mm/kmemcheck/shadow.c | 153 +++++++ arch/x86/mm/kmemcheck/shadow.h | 16 + include/linux/kmemcheck.h | 17 + include/linux/mm_types.h | 8 + init/main.c | 1 + kernel/sysctl.c | 12 + 19 files changed, 1304 insertions(+), 2 deletions(-) create mode 100644 arch/x86/include/asm/kmemcheck.h create mode 100644 arch/x86/mm/kmemcheck/Makefile create mode 100644 arch/x86/mm/kmemcheck/error.c create mode 100644 arch/x86/mm/kmemcheck/error.h create mode 100644 arch/x86/mm/kmemcheck/kmemcheck.c create mode 100644 arch/x86/mm/kmemcheck/opcode.c create mode 100644 arch/x86/mm/kmemcheck/opcode.h create mode 100644 arch/x86/mm/kmemcheck/pte.c create mode 100644 arch/x86/mm/kmemcheck/pte.h create mode 100644 arch/x86/mm/kmemcheck/shadow.c create mode 100644 arch/x86/mm/kmemcheck/shadow.h create mode 100644 include/linux/kmemcheck.h diff --git a/arch/x86/Makefile b/arch/x86/Makefile index edbd0ca62067..1b68659c41b4 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -81,6 +81,11 @@ ifdef CONFIG_CC_STACKPROTECTOR endif endif +# Don't unroll struct assignments with kmemcheck enabled +ifeq ($(CONFIG_KMEMCHECK),y) + KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) +endif + # Stackpointer is addressed different for 32 bit and 64 bit x86 sp-$(CONFIG_X86_32) := esp sp-$(CONFIG_X86_64) := rsp diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h new file mode 100644 index 000000000000..ed01518f297e --- /dev/null +++ b/arch/x86/include/asm/kmemcheck.h @@ -0,0 +1,42 @@ +#ifndef ASM_X86_KMEMCHECK_H +#define ASM_X86_KMEMCHECK_H + +#include +#include + +#ifdef CONFIG_KMEMCHECK +bool kmemcheck_active(struct pt_regs *regs); + +void kmemcheck_show(struct pt_regs *regs); +void kmemcheck_hide(struct pt_regs *regs); + +bool kmemcheck_fault(struct pt_regs *regs, + unsigned long address, unsigned long error_code); +bool kmemcheck_trap(struct pt_regs *regs); +#else +static inline bool kmemcheck_active(struct pt_regs *regs) +{ + return false; +} + +static inline void kmemcheck_show(struct pt_regs *regs) +{ +} + +static inline void kmemcheck_hide(struct pt_regs *regs) +{ +} + +static inline bool kmemcheck_fault(struct pt_regs *regs, + unsigned long address, unsigned long error_code) +{ + return false; +} + +static inline bool kmemcheck_trap(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_KMEMCHECK */ + +#endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 18ef7ebf2631..c5a08079ad5e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -317,6 +317,15 @@ static inline int pte_present(pte_t a) return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); } +static inline int pte_hidden(pte_t x) +{ +#ifdef CONFIG_KMEMCHECK + return pte_flags(x) & _PAGE_HIDDEN; +#else + return 0; +#endif +} + static inline int pmd_present(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_PRESENT; diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 4d258ad76a0f..9b5c92140aab 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -18,7 +18,7 @@ #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ #define _PAGE_BIT_UNUSED1 9 /* available for programmer */ #define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ -#define _PAGE_BIT_UNUSED3 11 +#define _PAGE_BIT_HIDDEN 11 /* hidden by kmemcheck */ #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 @@ -41,7 +41,7 @@ #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) #define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) -#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) +#define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index fdd30d08ab52..eefdeee8a871 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -10,6 +10,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o +obj-$(CONFIG_KMEMCHECK) += kmemcheck/ + obj-$(CONFIG_MMIOTRACE) += mmiotrace.o mmiotrace-y := kmmio.o pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile new file mode 100644 index 000000000000..4666b7a778be --- /dev/null +++ b/arch/x86/mm/kmemcheck/Makefile @@ -0,0 +1 @@ +obj-y := error.o kmemcheck.o opcode.o pte.o shadow.o diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c new file mode 100644 index 000000000000..5ec9f5a93f47 --- /dev/null +++ b/arch/x86/mm/kmemcheck/error.c @@ -0,0 +1,229 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "error.h" +#include "shadow.h" + +enum kmemcheck_error_type { + KMEMCHECK_ERROR_INVALID_ACCESS, + KMEMCHECK_ERROR_BUG, +}; + +#define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT) + +struct kmemcheck_error { + enum kmemcheck_error_type type; + + union { + /* KMEMCHECK_ERROR_INVALID_ACCESS */ + struct { + /* Kind of access that caused the error */ + enum kmemcheck_shadow state; + /* Address and size of the erroneous read */ + unsigned long address; + unsigned int size; + }; + }; + + struct pt_regs regs; + struct stack_trace trace; + unsigned long trace_entries[32]; + + /* We compress it to a char. */ + unsigned char shadow_copy[SHADOW_COPY_SIZE]; + unsigned char memory_copy[SHADOW_COPY_SIZE]; +}; + +/* + * Create a ring queue of errors to output. We can't call printk() directly + * from the kmemcheck traps, since this may call the console drivers and + * result in a recursive fault. + */ +static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE]; +static unsigned int error_count; +static unsigned int error_rd; +static unsigned int error_wr; +static unsigned int error_missed_count; + +static struct kmemcheck_error *error_next_wr(void) +{ + struct kmemcheck_error *e; + + if (error_count == ARRAY_SIZE(error_fifo)) { + ++error_missed_count; + return NULL; + } + + e = &error_fifo[error_wr]; + if (++error_wr == ARRAY_SIZE(error_fifo)) + error_wr = 0; + ++error_count; + return e; +} + +static struct kmemcheck_error *error_next_rd(void) +{ + struct kmemcheck_error *e; + + if (error_count == 0) + return NULL; + + e = &error_fifo[error_rd]; + if (++error_rd == ARRAY_SIZE(error_fifo)) + error_rd = 0; + --error_count; + return e; +} + +static void do_wakeup(unsigned long); +static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0); + +/* + * Save the context of an error report. + */ +void kmemcheck_error_save(enum kmemcheck_shadow state, + unsigned long address, unsigned int size, struct pt_regs *regs) +{ + static unsigned long prev_ip; + + struct kmemcheck_error *e; + void *shadow_copy; + void *memory_copy; + + /* Don't report several adjacent errors from the same EIP. */ + if (regs->ip == prev_ip) + return; + prev_ip = regs->ip; + + e = error_next_wr(); + if (!e) + return; + + e->type = KMEMCHECK_ERROR_INVALID_ACCESS; + + e->state = state; + e->address = address; + e->size = size; + + /* Save regs */ + memcpy(&e->regs, regs, sizeof(*regs)); + + /* Save stack trace */ + e->trace.nr_entries = 0; + e->trace.entries = e->trace_entries; + e->trace.max_entries = ARRAY_SIZE(e->trace_entries); + e->trace.skip = 0; + save_stack_trace_bp(&e->trace, regs->bp); + + /* Round address down to nearest 16 bytes */ + shadow_copy = kmemcheck_shadow_lookup(address + & ~(SHADOW_COPY_SIZE - 1)); + BUG_ON(!shadow_copy); + + memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE); + + kmemcheck_show_addr(address); + memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1)); + memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE); + kmemcheck_hide_addr(address); + + tasklet_hi_schedule_first(&kmemcheck_tasklet); +} + +/* + * Save the context of a kmemcheck bug. + */ +void kmemcheck_error_save_bug(struct pt_regs *regs) +{ + struct kmemcheck_error *e; + + e = error_next_wr(); + if (!e) + return; + + e->type = KMEMCHECK_ERROR_BUG; + + memcpy(&e->regs, regs, sizeof(*regs)); + + e->trace.nr_entries = 0; + e->trace.entries = e->trace_entries; + e->trace.max_entries = ARRAY_SIZE(e->trace_entries); + e->trace.skip = 1; + save_stack_trace(&e->trace); + + tasklet_hi_schedule_first(&kmemcheck_tasklet); +} + +void kmemcheck_error_recall(void) +{ + static const char *desc[] = { + [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated", + [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized", + [KMEMCHECK_SHADOW_INITIALIZED] = "initialized", + [KMEMCHECK_SHADOW_FREED] = "freed", + }; + + static const char short_desc[] = { + [KMEMCHECK_SHADOW_UNALLOCATED] = 'a', + [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u', + [KMEMCHECK_SHADOW_INITIALIZED] = 'i', + [KMEMCHECK_SHADOW_FREED] = 'f', + }; + + struct kmemcheck_error *e; + unsigned int i; + + e = error_next_rd(); + if (!e) + return; + + switch (e->type) { + case KMEMCHECK_ERROR_INVALID_ACCESS: + printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read " + "from %s memory (%p)\n", + 8 * e->size, e->state < ARRAY_SIZE(desc) ? + desc[e->state] : "(invalid shadow state)", + (void *) e->address); + + printk(KERN_INFO); + for (i = 0; i < SHADOW_COPY_SIZE; ++i) + printk("%02x", e->memory_copy[i]); + printk("\n"); + + printk(KERN_INFO); + for (i = 0; i < SHADOW_COPY_SIZE; ++i) { + if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) + printk(" %c", short_desc[e->shadow_copy[i]]); + else + printk(" ?"); + } + printk("\n"); + printk(KERN_INFO "%*c\n", 2 + 2 + * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); + break; + case KMEMCHECK_ERROR_BUG: + printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n"); + break; + } + + __show_regs(&e->regs, 1); + print_stack_trace(&e->trace, 0); +} + +static void do_wakeup(unsigned long data) +{ + while (error_count > 0) + kmemcheck_error_recall(); + + if (error_missed_count > 0) { + printk(KERN_WARNING "kmemcheck: Lost %d error reports because " + "the queue was too small\n", error_missed_count); + error_missed_count = 0; + } +} diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h new file mode 100644 index 000000000000..0efc2e8d0a20 --- /dev/null +++ b/arch/x86/mm/kmemcheck/error.h @@ -0,0 +1,15 @@ +#ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H +#define ARCH__X86__MM__KMEMCHECK__ERROR_H + +#include + +#include "shadow.h" + +void kmemcheck_error_save(enum kmemcheck_shadow state, + unsigned long address, unsigned int size, struct pt_regs *regs); + +void kmemcheck_error_save_bug(struct pt_regs *regs); + +void kmemcheck_error_recall(void); + +#endif diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c new file mode 100644 index 000000000000..9de7d8f6b6e1 --- /dev/null +++ b/arch/x86/mm/kmemcheck/kmemcheck.c @@ -0,0 +1,650 @@ +/** + * kmemcheck - a heavyweight memory checker for the linux kernel + * Copyright (C) 2007, 2008 Vegard Nossum + * (With a lot of help from Ingo Molnar and Pekka Enberg.) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2) as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "error.h" +#include "opcode.h" +#include "pte.h" +#include "shadow.h" + +#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT +# define KMEMCHECK_ENABLED 0 +#endif + +#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT +# define KMEMCHECK_ENABLED 1 +#endif + +#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT +# define KMEMCHECK_ENABLED 2 +#endif + +int kmemcheck_enabled = KMEMCHECK_ENABLED; + +int __init kmemcheck_init(void) +{ + printk(KERN_INFO "kmemcheck: \"Bugs, beware!\"\n"); + +#ifdef CONFIG_SMP + /* + * Limit SMP to use a single CPU. We rely on the fact that this code + * runs before SMP is set up. + */ + if (setup_max_cpus > 1) { + printk(KERN_INFO + "kmemcheck: Limiting number of CPUs to 1.\n"); + setup_max_cpus = 1; + } +#endif + + return 0; +} + +early_initcall(kmemcheck_init); + +#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT +int kmemcheck_enabled = 0; +#endif + +#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT +int kmemcheck_enabled = 1; +#endif + +#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT +int kmemcheck_enabled = 2; +#endif + +/* + * We need to parse the kmemcheck= option before any memory is allocated. + */ +static int __init param_kmemcheck(char *str) +{ + if (!str) + return -EINVAL; + + sscanf(str, "%d", &kmemcheck_enabled); + return 0; +} + +early_param("kmemcheck", param_kmemcheck); + +int kmemcheck_show_addr(unsigned long address) +{ + pte_t *pte; + + pte = kmemcheck_pte_lookup(address); + if (!pte) + return 0; + + set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); + __flush_tlb_one(address); + return 1; +} + +int kmemcheck_hide_addr(unsigned long address) +{ + pte_t *pte; + + pte = kmemcheck_pte_lookup(address); + if (!pte) + return 0; + + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + __flush_tlb_one(address); + return 1; +} + +struct kmemcheck_context { + bool busy; + int balance; + + /* + * There can be at most two memory operands to an instruction, but + * each address can cross a page boundary -- so we may need up to + * four addresses that must be hidden/revealed for each fault. + */ + unsigned long addr[4]; + unsigned long n_addrs; + unsigned long flags; + + /* Data size of the instruction that caused a fault. */ + unsigned int size; +}; + +static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); + +bool kmemcheck_active(struct pt_regs *regs) +{ + struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + + return data->balance > 0; +} + +/* Save an address that needs to be shown/hidden */ +static void kmemcheck_save_addr(unsigned long addr) +{ + struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + + BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); + data->addr[data->n_addrs++] = addr; +} + +static unsigned int kmemcheck_show_all(void) +{ + struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + unsigned int i; + unsigned int n; + + n = 0; + for (i = 0; i < data->n_addrs; ++i) + n += kmemcheck_show_addr(data->addr[i]); + + return n; +} + +static unsigned int kmemcheck_hide_all(void) +{ + struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + unsigned int i; + unsigned int n; + + n = 0; + for (i = 0; i < data->n_addrs; ++i) + n += kmemcheck_hide_addr(data->addr[i]); + + return n; +} + +/* + * Called from the #PF handler. + */ +void kmemcheck_show(struct pt_regs *regs) +{ + struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + + BUG_ON(!irqs_disabled()); + + if (unlikely(data->balance != 0)) { + kmemcheck_show_all(); + kmemcheck_error_save_bug(regs); + data->balance = 0; + return; + } + + /* + * None of the addresses actually belonged to kmemcheck. Note that + * this is not an error. + */ + if (kmemcheck_show_all() == 0) + return; + + ++data->balance; + + /* + * The IF needs to be cleared as well, so that the faulting + * instruction can run "uninterrupted". Otherwise, we might take + * an interrupt and start executing that before we've had a chance + * to hide the page again. + * + * NOTE: In the rare case of multiple faults, we must not override + * the original flags: + */ + if (!(regs->flags & X86_EFLAGS_TF)) + data->flags = regs->flags; + + regs->flags |= X86_EFLAGS_TF; + regs->flags &= ~X86_EFLAGS_IF; +} + +/* + * Called from the #DB handler. + */ +void kmemcheck_hide(struct pt_regs *regs) +{ + struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + int n; + + BUG_ON(!irqs_disabled()); + + if (data->balance == 0) + return; + + if (unlikely(data->balance != 1)) { + kmemcheck_show_all(); + kmemcheck_error_save_bug(regs); + data->n_addrs = 0; + data->balance = 0; + + if (!(data->flags & X86_EFLAGS_TF)) + regs->flags &= ~X86_EFLAGS_TF; + if (data->flags & X86_EFLAGS_IF) + regs->flags |= X86_EFLAGS_IF; + return; + } + + if (kmemcheck_enabled) + n = kmemcheck_hide_all(); + else + n = kmemcheck_show_all(); + + if (n == 0) + return; + + --data->balance; + + data->n_addrs = 0; + + if (!(data->flags & X86_EFLAGS_TF)) + regs->flags &= ~X86_EFLAGS_TF; + if (data->flags & X86_EFLAGS_IF) + regs->flags |= X86_EFLAGS_IF; +} + +void kmemcheck_show_pages(struct page *p, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; ++i) { + unsigned long address; + pte_t *pte; + unsigned int level; + + address = (unsigned long) page_address(&p[i]); + pte = lookup_address(address, &level); + BUG_ON(!pte); + BUG_ON(level != PG_LEVEL_4K); + + set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN)); + __flush_tlb_one(address); + } +} + +bool kmemcheck_page_is_tracked(struct page *p) +{ + /* This will also check the "hidden" flag of the PTE. */ + return kmemcheck_pte_lookup((unsigned long) page_address(p)); +} + +void kmemcheck_hide_pages(struct page *p, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; ++i) { + unsigned long address; + pte_t *pte; + unsigned int level; + + address = (unsigned long) page_address(&p[i]); + pte = lookup_address(address, &level); + BUG_ON(!pte); + BUG_ON(level != PG_LEVEL_4K); + + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN)); + __flush_tlb_one(address); + } +} + +/* Access may NOT cross page boundary */ +static void kmemcheck_read_strict(struct pt_regs *regs, + unsigned long addr, unsigned int size) +{ + void *shadow; + enum kmemcheck_shadow status; + + shadow = kmemcheck_shadow_lookup(addr); + if (!shadow) + return; + + kmemcheck_save_addr(addr); + status = kmemcheck_shadow_test(shadow, size); + if (status == KMEMCHECK_SHADOW_INITIALIZED) + return; + + if (kmemcheck_enabled) + kmemcheck_error_save(status, addr, size, regs); + + if (kmemcheck_enabled == 2) + kmemcheck_enabled = 0; + + /* Don't warn about it again. */ + kmemcheck_shadow_set(shadow, size); +} + +/* Access may cross page boundary */ +static void kmemcheck_read(struct pt_regs *regs, + unsigned long addr, unsigned int size) +{ + unsigned long page = addr & PAGE_MASK; + unsigned long next_addr = addr + size - 1; + unsigned long next_page = next_addr & PAGE_MASK; + + if (likely(page == next_page)) { + kmemcheck_read_strict(regs, addr, size); + return; + } + + /* + * What we do is basically to split the access across the + * two pages and handle each part separately. Yes, this means + * that we may now see reads that are 3 + 5 bytes, for + * example (and if both are uninitialized, there will be two + * reports), but it makes the code a lot simpler. + */ + kmemcheck_read_strict(regs, addr, next_page - addr); + kmemcheck_read_strict(regs, next_page, next_addr - next_page); +} + +static void kmemcheck_write_strict(struct pt_regs *regs, + unsigned long addr, unsigned int size) +{ + void *shadow; + + shadow = kmemcheck_shadow_lookup(addr); + if (!shadow) + return; + + kmemcheck_save_addr(addr); + kmemcheck_shadow_set(shadow, size); +} + +static void kmemcheck_write(struct pt_regs *regs, + unsigned long addr, unsigned int size) +{ + unsigned long page = addr & PAGE_MASK; + unsigned long next_addr = addr + size - 1; + unsigned long next_page = next_addr & PAGE_MASK; + + if (likely(page == next_page)) { + kmemcheck_write_strict(regs, addr, size); + return; + } + + /* See comment in kmemcheck_read(). */ + kmemcheck_write_strict(regs, addr, next_page - addr); + kmemcheck_write_strict(regs, next_page, next_addr - next_page); +} + +/* + * Copying is hard. We have two addresses, each of which may be split across + * a page (and each page will have different shadow addresses). + */ +static void kmemcheck_copy(struct pt_regs *regs, + unsigned long src_addr, unsigned long dst_addr, unsigned int size) +{ + uint8_t shadow[8]; + enum kmemcheck_shadow status; + + unsigned long page; + unsigned long next_addr; + unsigned long next_page; + + uint8_t *x; + unsigned int i; + unsigned int n; + + BUG_ON(size > sizeof(shadow)); + + page = src_addr & PAGE_MASK; + next_addr = src_addr + size - 1; + next_page = next_addr & PAGE_MASK; + + if (likely(page == next_page)) { + /* Same page */ + x = kmemcheck_shadow_lookup(src_addr); + if (x) { + kmemcheck_save_addr(src_addr); + for (i = 0; i < size; ++i) + shadow[i] = x[i]; + } else { + for (i = 0; i < size; ++i) + shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; + } + } else { + n = next_page - src_addr; + BUG_ON(n > sizeof(shadow)); + + /* First page */ + x = kmemcheck_shadow_lookup(src_addr); + if (x) { + kmemcheck_save_addr(src_addr); + for (i = 0; i < n; ++i) + shadow[i] = x[i]; + } else { + /* Not tracked */ + for (i = 0; i < n; ++i) + shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; + } + + /* Second page */ + x = kmemcheck_shadow_lookup(next_page); + if (x) { + kmemcheck_save_addr(next_page); + for (i = n; i < size; ++i) + shadow[i] = x[i - n]; + } else { + /* Not tracked */ + for (i = n; i < size; ++i) + shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; + } + } + + page = dst_addr & PAGE_MASK; + next_addr = dst_addr + size - 1; + next_page = next_addr & PAGE_MASK; + + if (likely(page == next_page)) { + /* Same page */ + x = kmemcheck_shadow_lookup(dst_addr); + if (x) { + kmemcheck_save_addr(dst_addr); + for (i = 0; i < size; ++i) { + x[i] = shadow[i]; + shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; + } + } + } else { + n = next_page - dst_addr; + BUG_ON(n > sizeof(shadow)); + + /* First page */ + x = kmemcheck_shadow_lookup(dst_addr); + if (x) { + kmemcheck_save_addr(dst_addr); + for (i = 0; i < n; ++i) { + x[i] = shadow[i]; + shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; + } + } + + /* Second page */ + x = kmemcheck_shadow_lookup(next_page); + if (x) { + kmemcheck_save_addr(next_page); + for (i = n; i < size; ++i) { + x[i - n] = shadow[i]; + shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; + } + } + } + + status = kmemcheck_shadow_test(shadow, size); + if (status == KMEMCHECK_SHADOW_INITIALIZED) + return; + + if (kmemcheck_enabled) + kmemcheck_error_save(status, src_addr, size, regs); + + if (kmemcheck_enabled == 2) + kmemcheck_enabled = 0; +} + +enum kmemcheck_method { + KMEMCHECK_READ, + KMEMCHECK_WRITE, +}; + +static void kmemcheck_access(struct pt_regs *regs, + unsigned long fallback_address, enum kmemcheck_method fallback_method) +{ + const uint8_t *insn; + const uint8_t *insn_primary; + unsigned int size; + + struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + + /* Recursive fault -- ouch. */ + if (data->busy) { + kmemcheck_show_addr(fallback_address); + kmemcheck_error_save_bug(regs); + return; + } + + data->busy = true; + + insn = (const uint8_t *) regs->ip; + insn_primary = kmemcheck_opcode_get_primary(insn); + + kmemcheck_opcode_decode(insn, &size); + + switch (insn_primary[0]) { +#ifdef CONFIG_KMEMCHECK_BITOPS_OK + /* AND, OR, XOR */ + /* + * Unfortunately, these instructions have to be excluded from + * our regular checking since they access only some (and not + * all) bits. This clears out "bogus" bitfield-access warnings. + */ + case 0x80: + case 0x81: + case 0x82: + case 0x83: + switch ((insn_primary[1] >> 3) & 7) { + /* OR */ + case 1: + /* AND */ + case 4: + /* XOR */ + case 6: + kmemcheck_write(regs, fallback_address, size); + goto out; + + /* ADD */ + case 0: + /* ADC */ + case 2: + /* SBB */ + case 3: + /* SUB */ + case 5: + /* CMP */ + case 7: + break; + } + break; +#endif + + /* MOVS, MOVSB, MOVSW, MOVSD */ + case 0xa4: + case 0xa5: + /* + * These instructions are special because they take two + * addresses, but we only get one page fault. + */ + kmemcheck_copy(regs, regs->si, regs->di, size); + goto out; + + /* CMPS, CMPSB, CMPSW, CMPSD */ + case 0xa6: + case 0xa7: + kmemcheck_read(regs, regs->si, size); + kmemcheck_read(regs, regs->di, size); + goto out; + } + + /* + * If the opcode isn't special in any way, we use the data from the + * page fault handler to determine the address and type of memory + * access. + */ + switch (fallback_method) { + case KMEMCHECK_READ: + kmemcheck_read(regs, fallback_address, size); + goto out; + case KMEMCHECK_WRITE: + kmemcheck_write(regs, fallback_address, size); + goto out; + } + +out: + data->busy = false; +} + +bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + pte_t *pte; + unsigned int level; + + /* + * XXX: Is it safe to assume that memory accesses from virtual 86 + * mode or non-kernel code segments will _never_ access kernel + * memory (e.g. tracked pages)? For now, we need this to avoid + * invoking kmemcheck for PnP BIOS calls. + */ + if (regs->flags & X86_VM_MASK) + return false; + if (regs->cs != __KERNEL_CS) + return false; + + pte = lookup_address(address, &level); + if (!pte) + return false; + if (level != PG_LEVEL_4K) + return false; + if (!pte_hidden(*pte)) + return false; + + if (error_code & 2) + kmemcheck_access(regs, address, KMEMCHECK_WRITE); + else + kmemcheck_access(regs, address, KMEMCHECK_READ); + + kmemcheck_show(regs); + return true; +} + +bool kmemcheck_trap(struct pt_regs *regs) +{ + if (!kmemcheck_active(regs)) + return false; + + /* We're done. */ + kmemcheck_hide(regs); + return true; +} diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c new file mode 100644 index 000000000000..a4100b6e783a --- /dev/null +++ b/arch/x86/mm/kmemcheck/opcode.c @@ -0,0 +1,101 @@ +#include + +#include "opcode.h" + +static bool opcode_is_prefix(uint8_t b) +{ + return + /* Group 1 */ + b == 0xf0 || b == 0xf2 || b == 0xf3 + /* Group 2 */ + || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 + || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e + /* Group 3 */ + || b == 0x66 + /* Group 4 */ + || b == 0x67; +} + +static bool opcode_is_rex_prefix(uint8_t b) +{ + return (b & 0xf0) == 0x40; +} + +#define REX_W (1 << 3) + +/* + * This is a VERY crude opcode decoder. We only need to find the size of the + * load/store that caused our #PF and this should work for all the opcodes + * that we care about. Moreover, the ones who invented this instruction set + * should be shot. + */ +void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size) +{ + /* Default operand size */ + int operand_size_override = 4; + + /* prefixes */ + for (; opcode_is_prefix(*op); ++op) { + if (*op == 0x66) + operand_size_override = 2; + } + +#ifdef CONFIG_X86_64 + /* REX prefix */ + if (opcode_is_rex_prefix(*op)) { + uint8_t rex = *op; + + ++op; + if (rex & REX_W) { + switch (*op) { + case 0x63: + *size = 4; + return; + case 0x0f: + ++op; + + switch (*op) { + case 0xb6: + case 0xbe: + *size = 1; + return; + case 0xb7: + case 0xbf: + *size = 2; + return; + } + + break; + } + + *size = 8; + return; + } + } +#endif + + /* escape opcode */ + if (*op == 0x0f) { + ++op; + + /* + * This is move with zero-extend and sign-extend, respectively; + * we don't have to think about 0xb6/0xbe, because this is + * already handled in the conditional below. + */ + if (*op == 0xb7 || *op == 0xbf) + operand_size_override = 2; + } + + *size = (*op & 1) ? operand_size_override : 1; +} + +const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op) +{ + /* skip prefixes */ + while (opcode_is_prefix(*op)) + ++op; + if (opcode_is_rex_prefix(*op)) + ++op; + return op; +} diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h new file mode 100644 index 000000000000..6956aad66b5b --- /dev/null +++ b/arch/x86/mm/kmemcheck/opcode.h @@ -0,0 +1,9 @@ +#ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H +#define ARCH__X86__MM__KMEMCHECK__OPCODE_H + +#include + +void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size); +const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op); + +#endif diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c new file mode 100644 index 000000000000..4ead26eeaf96 --- /dev/null +++ b/arch/x86/mm/kmemcheck/pte.c @@ -0,0 +1,22 @@ +#include + +#include + +#include "pte.h" + +pte_t *kmemcheck_pte_lookup(unsigned long address) +{ + pte_t *pte; + unsigned int level; + + pte = lookup_address(address, &level); + if (!pte) + return NULL; + if (level != PG_LEVEL_4K) + return NULL; + if (!pte_hidden(*pte)) + return NULL; + + return pte; +} + diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h new file mode 100644 index 000000000000..9f5966456492 --- /dev/null +++ b/arch/x86/mm/kmemcheck/pte.h @@ -0,0 +1,10 @@ +#ifndef ARCH__X86__MM__KMEMCHECK__PTE_H +#define ARCH__X86__MM__KMEMCHECK__PTE_H + +#include + +#include + +pte_t *kmemcheck_pte_lookup(unsigned long address); + +#endif diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c new file mode 100644 index 000000000000..5544d3600877 --- /dev/null +++ b/arch/x86/mm/kmemcheck/shadow.c @@ -0,0 +1,153 @@ +#include +#include +#include + +#include +#include + +#include "pte.h" +#include "shadow.h" + +/* + * Return the shadow address for the given address. Returns NULL if the + * address is not tracked. + * + * We need to be extremely careful not to follow any invalid pointers, + * because this function can be called for *any* possible address. + */ +void *kmemcheck_shadow_lookup(unsigned long address) +{ + pte_t *pte; + struct page *page; + + if (!virt_addr_valid(address)) + return NULL; + + pte = kmemcheck_pte_lookup(address); + if (!pte) + return NULL; + + page = virt_to_page(address); + if (!page->shadow) + return NULL; + return page->shadow + (address & (PAGE_SIZE - 1)); +} + +static void mark_shadow(void *address, unsigned int n, + enum kmemcheck_shadow status) +{ + unsigned long addr = (unsigned long) address; + unsigned long last_addr = addr + n - 1; + unsigned long page = addr & PAGE_MASK; + unsigned long last_page = last_addr & PAGE_MASK; + unsigned int first_n; + void *shadow; + + /* If the memory range crosses a page boundary, stop there. */ + if (page == last_page) + first_n = n; + else + first_n = page + PAGE_SIZE - addr; + + shadow = kmemcheck_shadow_lookup(addr); + if (shadow) + memset(shadow, status, first_n); + + addr += first_n; + n -= first_n; + + /* Do full-page memset()s. */ + while (n >= PAGE_SIZE) { + shadow = kmemcheck_shadow_lookup(addr); + if (shadow) + memset(shadow, status, PAGE_SIZE); + + addr += PAGE_SIZE; + n -= PAGE_SIZE; + } + + /* Do the remaining page, if any. */ + if (n > 0) { + shadow = kmemcheck_shadow_lookup(addr); + if (shadow) + memset(shadow, status, n); + } +} + +void kmemcheck_mark_unallocated(void *address, unsigned int n) +{ + mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED); +} + +void kmemcheck_mark_uninitialized(void *address, unsigned int n) +{ + mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED); +} + +/* + * Fill the shadow memory of the given address such that the memory at that + * address is marked as being initialized. + */ +void kmemcheck_mark_initialized(void *address, unsigned int n) +{ + mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED); +} +EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized); + +void kmemcheck_mark_freed(void *address, unsigned int n) +{ + mark_shadow(address, n, KMEMCHECK_SHADOW_FREED); +} + +void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; ++i) + kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE); +} + +void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n) +{ + unsigned int i; + + for (i = 0; i < n; ++i) + kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE); +} + +enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) +{ + uint8_t *x; + unsigned int i; + + x = shadow; + +#ifdef CONFIG_KMEMCHECK_PARTIAL_OK + /* + * Make sure _some_ bytes are initialized. Gcc frequently generates + * code to access neighboring bytes. + */ + for (i = 0; i < size; ++i) { + if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) + return x[i]; + } +#else + /* All bytes must be initialized. */ + for (i = 0; i < size; ++i) { + if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) + return x[i]; + } +#endif + + return x[0]; +} + +void kmemcheck_shadow_set(void *shadow, unsigned int size) +{ + uint8_t *x; + unsigned int i; + + x = shadow; + for (i = 0; i < size; ++i) + x[i] = KMEMCHECK_SHADOW_INITIALIZED; +} diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h new file mode 100644 index 000000000000..af46d9ab9d86 --- /dev/null +++ b/arch/x86/mm/kmemcheck/shadow.h @@ -0,0 +1,16 @@ +#ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H +#define ARCH__X86__MM__KMEMCHECK__SHADOW_H + +enum kmemcheck_shadow { + KMEMCHECK_SHADOW_UNALLOCATED, + KMEMCHECK_SHADOW_UNINITIALIZED, + KMEMCHECK_SHADOW_INITIALIZED, + KMEMCHECK_SHADOW_FREED, +}; + +void *kmemcheck_shadow_lookup(unsigned long address); + +enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); +void kmemcheck_shadow_set(void *shadow, unsigned int size); + +#endif diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h new file mode 100644 index 000000000000..39480c91b2f9 --- /dev/null +++ b/include/linux/kmemcheck.h @@ -0,0 +1,17 @@ +#ifndef LINUX_KMEMCHECK_H +#define LINUX_KMEMCHECK_H + +#include +#include + +#ifdef CONFIG_KMEMCHECK +extern int kmemcheck_enabled; + +int kmemcheck_show_addr(unsigned long address); +int kmemcheck_hide_addr(unsigned long address); +#else +#define kmemcheck_enabled 0 + +#endif /* CONFIG_KMEMCHECK */ + +#endif /* LINUX_KMEMCHECK_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0e80e26ecf21..0042090a4d70 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -98,6 +98,14 @@ struct page { #ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS unsigned long debug_flags; /* Use atomic bitops on this */ #endif + +#ifdef CONFIG_KMEMCHECK + /* + * kmemcheck wants to track the status of each byte in a page; this + * is a pointer to such a status block. NULL if not tracked. + */ + void *shadow; +#endif }; /* diff --git a/init/main.c b/init/main.c index 5616661eac01..e3c335e47cd2 100644 --- a/init/main.c +++ b/init/main.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ce664f98e3fb..9ef80bba3509 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -959,6 +960,17 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif +#ifdef CONFIG_KMEMCHECK + { + .ctl_name = CTL_UNNUMBERED, + .procname = "kmemcheck", + .data = &kmemcheck_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif + /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt -- 2.20.1