x86-32: Fix possible incomplete TLB invalidate with PAE pagetables
authorDave Hansen <dave@sr71.net>
Fri, 12 Apr 2013 23:23:54 +0000 (16:23 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 12 Apr 2013 23:56:47 +0000 (16:56 -0700)
This patch attempts to fix:

https://bugzilla.kernel.org/show_bug.cgi?id=56461

The symptom is a crash and messages like this:

chrome: Corrupted page table at address 34a03000
*pdpt = 0000000000000000 *pde = 0000000000000000
Bad pagetable: 000f [#1] PREEMPT SMP

Ingo guesses this got introduced by commit 611ae8e3f520 ("x86/tlb:
enable tlb flush range support for x86") since that code started to free
unused pagetables.

On x86-32 PAE kernels, that new code has the potential to free an entire
PMD page and will clear one of the four page-directory-pointer-table
(aka pgd_t entries).

The hardware aggressively "caches" these top-level entries and invlpg
does not actually affect the CPU's copy.  If we clear one we *HAVE* to
do a full TLB flush, otherwise we might continue using a freed pmd page.
(note, we do this properly on the population side in pud_populate()).

This patch tracks whenever we clear one of these entries in the 'struct
mmu_gather', and ensures that we follow up with a full tlb flush.

BTW, I disassembled and checked that:

if (tlb->fullmm == 0)
and
if (!tlb->fullmm && !tlb->need_flush_all)

generate essentially the same code, so there should be zero impact there
to the !PAE case.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Artem S Tashkinov <t.artem@mailcity.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/x86/include/asm/tlb.h
arch/x86/mm/pgtable.c
include/asm-generic/tlb.h
mm/memory.c

index 4fef20773b8f9fcf104702ac2d8ef0c29a05c235..c7797307fc2ba5b24adacced83c9587bc5d2a8de 100644 (file)
@@ -7,7 +7,7 @@
 
 #define tlb_flush(tlb)                                                 \
 {                                                                      \
-       if (tlb->fullmm == 0)                                           \
+       if (!tlb->fullmm && !tlb->need_flush_all)                       \
                flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL); \
        else                                                            \
                flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL);   \
index 193350b51f90af74508a8ec97217085bf3acdbab..17fda6a8b3c2df13adaee439b0ad6ab9f50796cb 100644 (file)
@@ -58,6 +58,13 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 {
        paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
+       /*
+        * NOTE! For PAE, any changes to the top page-directory-pointer-table
+        * entries need a full cr3 reload to flush.
+        */
+#ifdef CONFIG_X86_PAE
+       tlb->need_flush_all = 1;
+#endif
        tlb_remove_page(tlb, virt_to_page(pmd));
 }
 
index 25f01d0bc149cc388ce5a8e66d556669809e2b34..b1b1fa6ffffecce461a44945524d60d9b46dd54c 100644 (file)
@@ -99,7 +99,12 @@ struct mmu_gather {
        unsigned int            need_flush : 1, /* Did free PTEs */
                                fast_mode  : 1; /* No batching   */
 
-       unsigned int            fullmm;
+       /* we are in the middle of an operation to clear
+        * a full mm and can make some optimizations */
+       unsigned int            fullmm : 1,
+       /* we have performed an operation which
+        * requires a complete flush of the tlb */
+                               need_flush_all : 1;
 
        struct mmu_gather_batch *active;
        struct mmu_gather_batch local;
index 494526ae024a3173fc200d47e36087f6252852c2..13cbc420fead06d96f82b437a67d03b6ff9d5fca 100644 (file)
@@ -216,6 +216,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
        tlb->mm = mm;
 
        tlb->fullmm     = fullmm;
+       tlb->need_flush_all = 0;
        tlb->start      = -1UL;
        tlb->end        = 0;
        tlb->need_flush = 0;