x86, mm: Fix page table early allocation offset checking
authorYinghai Lu <yinghai@kernel.org>
Thu, 24 Jan 2013 20:19:42 +0000 (12:19 -0800)
committerH. Peter Anvin <hpa@linux.intel.com>
Tue, 29 Jan 2013 23:12:23 +0000 (15:12 -0800)
During debugging loading kernel above 4G, found that one page is not used
in pre-allocated BRK area for early page allocation.
pgt_buf_top is address that can not be used, so should check if that new
end is above that top, otherwise last page will not be used.

Fix that checking and also add print out for allocation from pre-allocated
BRK area to catch possible bugs later.

But after we get back that page for pgt, it tiggers one bug in pgt allocation
with xen: We need to avoid to use page as pgt to map range that is
overlapping with that pgt page.

Add checking about overlapping, when it happens, use memblock allocation
instead.  That fixes crash on Xen PV guest with 2G that Stefan found.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1359058816-7615-2-git-send-email-yinghai@kernel.org
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Tested-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
arch/x86/mm/init.c

index 6f85de8a1f281a63b5c2d632b473279bb7c639fe..78d1ef3eab6624bc763cc5edd368bad204adc730 100644 (file)
@@ -25,6 +25,8 @@ static unsigned long __initdata pgt_buf_top;
 
 static unsigned long min_pfn_mapped;
 
+static bool __initdata can_use_brk_pgt = true;
+
 /*
  * Pages returned are already directly mapped.
  *
@@ -47,7 +49,7 @@ __ref void *alloc_low_pages(unsigned int num)
                                                __GFP_ZERO, order);
        }
 
-       if ((pgt_buf_end + num) >= pgt_buf_top) {
+       if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) {
                unsigned long ret;
                if (min_pfn_mapped >= max_pfn_mapped)
                        panic("alloc_low_page: ran out of memory");
@@ -61,6 +63,8 @@ __ref void *alloc_low_pages(unsigned int num)
        } else {
                pfn = pgt_buf_end;
                pgt_buf_end += num;
+               printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n",
+                       pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1);
        }
 
        for (i = 0; i < num; i++) {
@@ -370,8 +374,15 @@ static unsigned long __init init_range_memory_mapping(
                if (start >= end)
                        continue;
 
+               /*
+                * if it is overlapping with brk pgt, we need to
+                * alloc pgt buf from memblock instead.
+                */
+               can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >=
+                                   min(end, (u64)pgt_buf_top<<PAGE_SHIFT);
                init_memory_mapping(start, end);
                mapped_ram_size += end - start;
+               can_use_brk_pgt = true;
        }
 
        return mapped_ram_size;