x86: Fix alternatives and kprobes to remap write-protected kernel text
authorAndi Kleen <ak@suse.de>
Sun, 22 Jul 2007 09:12:31 +0000 (11:12 +0200)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Sun, 22 Jul 2007 18:03:37 +0000 (11:03 -0700)
Reenable kprobes and alternative patching when the kernel text is write
protected by DEBUG_RODATA

Add a general utility function to change write protected text.  The new
function remaps the code using vmap to write it and takes care of CPU
synchronization.  It also does CLFLUSH to make icache recovery faster.

There are some limitations on when the function can be used, see the
comment.

This is a newer version that also changes the paravirt_ops code.
text_poke also supports multi byte patching now.

Contains bug fixes from Zach Amsden and suggestions from Mathieu
Desnoyers.

Cc: Jan Beulich <jbeulich@novell.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Cc: Zach Amsden <zach@vmware.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/i386/kernel/alternative.c
arch/i386/kernel/kprobes.c
arch/i386/kernel/paravirt.c
arch/i386/mm/init.c
arch/x86_64/kernel/kprobes.c
arch/x86_64/mm/init.c
arch/x86_64/mm/pageattr.c
include/asm-i386/alternative.h
include/asm-x86_64/alternative.h
include/asm-x86_64/pgtable.h

index 0695be538de53cb88f56e6d068feabbe733d16ae..206ea2ca63cc5beb221c238ff81172244de262b9 100644 (file)
@@ -2,8 +2,12 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
+#include <linux/kprobes.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
+#include <asm/pgtable.h>
 
 #ifdef CONFIG_HOTPLUG_CPU
 static int smp_alt_once;
@@ -150,7 +154,7 @@ static void nop_out(void *insns, unsigned int len)
                unsigned int noplen = len;
                if (noplen > ASM_NOP_MAX)
                        noplen = ASM_NOP_MAX;
-               memcpy(insns, noptable[noplen], noplen);
+               text_poke(insns, noptable[noplen], noplen);
                insns += noplen;
                len -= noplen;
        }
@@ -202,7 +206,7 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
                        continue;
                if (*ptr > text_end)
                        continue;
-               **ptr = 0xf0; /* lock prefix */
+               text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
        };
 }
 
@@ -360,10 +364,6 @@ void apply_paravirt(struct paravirt_patch_site *start,
                /* Pad the rest with nops */
                nop_out(p->instr + used, p->len - used);
        }
-
-       /* Sync to be conservative, in case we patched following
-        * instructions */
-       sync_core();
 }
 extern struct paravirt_patch_site __start_parainstructions[],
        __stop_parainstructions[];
@@ -406,3 +406,31 @@ void __init alternative_instructions(void)
        apply_paravirt(__parainstructions, __parainstructions_end);
        local_irq_restore(flags);
 }
+
+/*
+ * Warning:
+ * When you use this code to patch more than one byte of an instruction
+ * you need to make sure that other CPUs cannot execute this code in parallel.
+ * Also no thread must be currently preempted in the middle of these instructions.
+ * And on the local CPU you need to be protected again NMI or MCE handlers
+ * seeing an inconsistent instruction while you patch.
+ */
+void __kprobes text_poke(void *oaddr, unsigned char *opcode, int len)
+{
+        u8 *addr = oaddr;
+       if (!pte_write(*lookup_address((unsigned long)addr))) {
+               struct page *p[2] = { virt_to_page(addr), virt_to_page(addr+PAGE_SIZE) };
+               addr = vmap(p, 2, VM_MAP, PAGE_KERNEL);
+               if (!addr)
+                       return;
+               addr += ((unsigned long)oaddr) % PAGE_SIZE;
+       }
+       memcpy(addr, opcode, len);
+       sync_core();
+       /* Not strictly needed, but can speed CPU recovery up. Ignore cross cacheline
+          case. */
+       if (cpu_has_clflush)
+               asm("clflush (%0) " :: "r" (oaddr) : "memory");
+       if (addr != oaddr)
+               vunmap(addr);
+}
index dde828a333c3e8355e49d0a3ed8d562b3071d4f9..448a50b1324c94636d1a2b71f672f901991eedc6 100644 (file)
@@ -35,6 +35,7 @@
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
 #include <asm/uaccess.h>
+#include <asm/alternative.h>
 
 void jprobe_return_end(void);
 
@@ -169,16 +170,12 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
-       *p->addr = BREAKPOINT_INSTRUCTION;
-       flush_icache_range((unsigned long) p->addr,
-                          (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+       text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
 }
 
 void __kprobes arch_disarm_kprobe(struct kprobe *p)
 {
-       *p->addr = p->opcode;
-       flush_icache_range((unsigned long) p->addr,
-                          (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+       text_poke(p->addr, &p->opcode, 1);
 }
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
index 53f07a8275e3a9b39ba4581e8f8a03bb75e336ce..79c167fcaee9d97558c173bde5647bdd83445ebb 100644 (file)
@@ -124,20 +124,28 @@ unsigned paravirt_patch_ignore(unsigned len)
        return len;
 }
 
+struct branch {
+       unsigned char opcode;
+       u32 delta;
+} __attribute__((packed));
+
 unsigned paravirt_patch_call(void *target, u16 tgt_clobbers,
                             void *site, u16 site_clobbers,
                             unsigned len)
 {
        unsigned char *call = site;
        unsigned long delta = (unsigned long)target - (unsigned long)(call+5);
+       struct branch b;
 
        if (tgt_clobbers & ~site_clobbers)
                return len;     /* target would clobber too much for this site */
        if (len < 5)
                return len;     /* call too long for patch site */
 
-       *call++ = 0xe8;         /* call */
-       *(unsigned long *)call = delta;
+       b.opcode = 0xe8; /* call */
+       b.delta = delta;
+       BUILD_BUG_ON(sizeof(b) != 5);
+       text_poke(call, (unsigned char *)&b, 5);
 
        return 5;
 }
@@ -150,8 +158,9 @@ unsigned paravirt_patch_jmp(void *target, void *site, unsigned len)
        if (len < 5)
                return len;     /* call too long for patch site */
 
-       *jmp++ = 0xe9;          /* jmp */
-       *(unsigned long *)jmp = delta;
+       b.opcode = 0xe9;        /* jmp */
+       b.delta = delta;
+       text_poke(call, (unsigned char *)&b, 5);
 
        return 5;
 }
index e1a9a805c445c58573e5e77dd1be682d52d9b5ee..c3b9905af2d5024b71451e796c052a2ad1813718 100644 (file)
@@ -800,17 +800,9 @@ void mark_rodata_ro(void)
        unsigned long start = PFN_ALIGN(_text);
        unsigned long size = PFN_ALIGN(_etext) - start;
 
-#ifndef CONFIG_KPROBES
-#ifdef CONFIG_HOTPLUG_CPU
-       /* It must still be possible to apply SMP alternatives. */
-       if (num_possible_cpus() <= 1)
-#endif
-       {
-               change_page_attr(virt_to_page(start),
-                                size >> PAGE_SHIFT, PAGE_KERNEL_RX);
-               printk("Write protecting the kernel text: %luk\n", size >> 10);
-       }
-#endif
+       change_page_attr(virt_to_page(start),
+                        size >> PAGE_SHIFT, PAGE_KERNEL_RX);
+       printk("Write protecting the kernel text: %luk\n", size >> 10);
        start += size;
        size = (unsigned long)__end_rodata - start;
        change_page_attr(virt_to_page(start),
index d4a0d0ac99351a8fd1b67ca9f65fa6aa01861b9b..a30e004682e2dc60fe81a7e1caf007fbbcf314a7 100644 (file)
@@ -39,9 +39,9 @@
 #include <linux/module.h>
 #include <linux/kdebug.h>
 
-#include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
+#include <asm/alternative.h>
 
 void jprobe_return_end(void);
 static void __kprobes arch_copy_kprobe(struct kprobe *p);
@@ -209,16 +209,12 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
 
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
-       *p->addr = BREAKPOINT_INSTRUCTION;
-       flush_icache_range((unsigned long) p->addr,
-                          (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+       text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1);
 }
 
 void __kprobes arch_disarm_kprobe(struct kprobe *p)
 {
-       *p->addr = p->opcode;
-       flush_icache_range((unsigned long) p->addr,
-                          (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+       text_poke(p->addr, &p->opcode, 1);
 }
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
index 2044fa961c0764a31e7f1b9587aa54d49ddad9b5..314e12b2209f111464dd499949bc91d806e0b095 100644 (file)
@@ -600,16 +600,6 @@ void mark_rodata_ro(void)
 {
        unsigned long start = (unsigned long)_stext, end;
 
-#ifdef CONFIG_HOTPLUG_CPU
-       /* It must still be possible to apply SMP alternatives. */
-       if (num_possible_cpus() > 1)
-               start = (unsigned long)_etext;
-#endif
-
-#ifdef CONFIG_KPROBES
-       start = (unsigned long)__start_rodata;
-#endif
-       
        end = (unsigned long)__end_rodata;
        start = (start + PAGE_SIZE - 1) & PAGE_MASK;
        end &= PAGE_MASK;
index 36377b6b8efe9af1039104b753665edf96a80821..7e161c698af47cb98a766c8253c8596cfeeb4b74 100644 (file)
@@ -13,7 +13,7 @@
 #include <asm/tlbflush.h>
 #include <asm/io.h>
 
-static inline pte_t *lookup_address(unsigned long address) 
+pte_t *lookup_address(unsigned long address)
 { 
        pgd_t *pgd = pgd_offset_k(address);
        pud_t *pud;
index eb7da5402bfaa17623bb523bed36c171adc0c776..bda6c810c0f4e3ac48973f3af52fdd478862f05b 100644 (file)
@@ -149,4 +149,6 @@ apply_paravirt(struct paravirt_patch_site *start,
 #define __parainstructions_end NULL
 #endif
 
+extern void text_poke(void *addr, unsigned char *opcode, int len);
+
 #endif /* _I386_ALTERNATIVE_H */
index eea7aecfac787cef93d9ae09b66745cb313dda06..ab161e810151aa2fbe9bed5f08fef5194b0f28c2 100644 (file)
@@ -154,4 +154,6 @@ apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
 #define __parainstructions_end NULL
 #endif
 
+extern void text_poke(void *addr, unsigned char *opcode, int len);
+
 #endif /* _X86_64_ALTERNATIVE_H */
index 60cff1e4f7a34b9797dcb8eb4b6e39e6ff839867..c9d8764c89d182920dc1b50b4938f63521292c70 100644 (file)
@@ -403,6 +403,8 @@ extern struct list_head pgd_list;
 
 extern int kern_addr_valid(unsigned long addr); 
 
+pte_t *lookup_address(unsigned long addr);
+
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)                \
                remap_pfn_range(vma, vaddr, pfn, size, prot)