* and we must append to the existing area instead of entirely
* overwriting it.
*/
- level4p = read_cr3();
+ level4p = read_cr3_pa();
if (level4p == (unsigned long)_pgtable) {
debug_putstr("booted via startup_32()\n");
pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
__kernel_fpu_begin(); \
\
if (efi_scratch.use_pgd) { \
- efi_scratch.prev_cr3 = read_cr3(); \
+ efi_scratch.prev_cr3 = __read_cr3(); \
write_cr3((unsigned long)efi_scratch.efi_pgt); \
__flush_tlb_all(); \
} \
/*
* This can be used from process context to figure out what the value of
- * CR3 is without needing to do a (slow) read_cr3().
+ * CR3 is without needing to do a (slow) __read_cr3().
*
* It's intended to be used for code like KVM that sneakily changes CR3
* and needs to restore it. It needs to be used very carefully.
/* For now, be very restrictive about when this can be called. */
VM_WARN_ON(in_nmi() || !in_atomic());
- VM_BUG_ON(cr3 != read_cr3());
+ VM_BUG_ON(cr3 != __read_cr3());
return cr3;
}
PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
}
-static inline unsigned long read_cr3(void)
+static inline unsigned long __read_cr3(void)
{
return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
}
#else
#define X86_VM_MASK 0 /* No VM86 support */
#endif
+
+/*
+ * CR3's layout varies depending on several things.
+ *
+ * If CR4.PCIDE is set (64-bit only), then CR3[11:0] is the address space ID.
+ * If PAE is enabled, then CR3[11:5] is part of the PDPT address
+ * (i.e. it's 32-byte aligned, not page-aligned) and CR3[4:0] is ignored.
+ * Otherwise (non-PAE, non-PCID), CR3[3] is PWT, CR3[4] is PCD, and
+ * CR3[2:0] and CR3[11:5] are ignored.
+ *
+ * In all cases, Linux puts zeros in the low ignored bits and in PWT and PCD.
+ *
+ * CR3[63] is always read as zero. If CR4.PCIDE is set, then CR3[63] may be
+ * written as 1 to prevent the write to CR3 from flushing the TLB.
+ *
+ * On systems with SME, one bit (in a variable position!) is stolen to indicate
+ * that the top-level paging structure is encrypted.
+ *
+ * All of the remaining bits indicate the physical address of the top-level
+ * paging structure.
+ *
+ * CR3_ADDR_MASK is the mask used by read_cr3_pa().
+ */
+#ifdef CONFIG_X86_64
+/* Mask off the address space ID bits. */
+#define CR3_ADDR_MASK 0x7FFFFFFFFFFFF000ull
+#define CR3_PCID_MASK 0xFFFull
+#else
+/*
+ * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
+ * a tiny bit of code size by setting all the bits.
+ */
+#define CR3_ADDR_MASK 0xFFFFFFFFull
+#define CR3_PCID_MASK 0ull
+#endif
+
#endif /* _ASM_X86_PROCESSOR_FLAGS_H */
native_cpuid_reg(ecx)
native_cpuid_reg(edx)
+/*
+ * Friendlier CR3 helpers.
+ */
+static inline unsigned long read_cr3_pa(void)
+{
+ return __read_cr3() & CR3_ADDR_MASK;
+}
+
static inline void load_cr3(pgd_t *pgdir)
{
write_cr3(__pa(pgdir));
asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
}
-static inline unsigned long native_read_cr3(void)
+static inline unsigned long __native_read_cr3(void)
{
unsigned long val;
asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
native_write_cr2(x);
}
-static inline unsigned long read_cr3(void)
+/*
+ * Careful! CR3 contains more than just an address. You probably want
+ * read_cr3_pa() instead.
+ */
+static inline unsigned long __read_cr3(void)
{
- return native_read_cr3();
+ return __native_read_cr3();
}
static inline void write_cr3(unsigned long x)
* back:
*/
preempt_disable();
- native_write_cr3(native_read_cr3());
+ native_write_cr3(__native_read_cr3());
preempt_enable();
}
this_cpu_write(cpu_tlbstate.state, 0);
this_cpu_write(cpu_tlbstate.loaded_mm, &init_mm);
- WARN_ON(read_cr3() != __pa_symbol(swapper_pg_dir));
+ WARN_ON(read_cr3_pa() != __pa_symbol(swapper_pg_dir));
}
static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
pmdval_t pmd, *pmd_p;
/* Invalid address or early pgt is done ? */
- if (physaddr >= MAXMEM || read_cr3() != __pa_nodebug(early_level4_pgt))
+ if (physaddr >= MAXMEM ||
+ read_cr3_pa() != __pa_nodebug(early_level4_pgt))
return -1;
again:
.read_cr2 = native_read_cr2,
.write_cr2 = native_write_cr2,
- .read_cr3 = native_read_cr3,
+ .read_cr3 = __native_read_cr3,
.write_cr3 = native_write_cr3,
.flush_tlb_user = native_flush_tlb,
cr0 = read_cr0();
cr2 = read_cr2();
- cr3 = read_cr3();
+ cr3 = __read_cr3();
cr4 = __read_cr4();
printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
cr0, cr2, cr3, cr4);
cr0 = read_cr0();
cr2 = read_cr2();
- cr3 = read_cr3();
+ cr3 = __read_cr3();
cr4 = __read_cr4();
printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
* Save the most likely value for this task's CR3 in the VMCS.
* We can't use __get_current_cr3_fast() because we're not atomic.
*/
- cr3 = read_cr3();
+ cr3 = __read_cr3();
vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
vmx->host_state.vmcs_host_cr3 = cr3;
* Do _not_ use "current" here. We might be inside
* an interrupt in the middle of a task switch..
*/
- pgd_paddr = read_cr3();
+ pgd_paddr = read_cr3_pa();
pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
if (!pmd_k)
return -1;
static void dump_pagetable(unsigned long address)
{
- pgd_t *base = __va(read_cr3());
+ pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = &base[pgd_index(address)];
p4d_t *p4d;
pud_t *pud;
* happen within a race in page table update. In the later
* case just flush:
*/
- pgd = (pgd_t *)__va(read_cr3()) + pgd_index(address);
+ pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address);
pgd_ref = pgd_offset_k(address);
if (pgd_none(*pgd_ref))
return -1;
static void dump_pagetable(unsigned long address)
{
- pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+ pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = base + pgd_index(address);
p4d_t *p4d;
pud_t *pud;
pgd_t *pgd;
pte_t *pte;
- pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+ pgd = __va(read_cr3_pa());
pgd += pgd_index(address);
pte = lookup_address_in_pgd(pgd, address, &level);
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
{
/* Don't assume we're using swapper_pg_dir at this point */
- pgd_t *base = __va(read_cr3());
+ pgd_t *base = __va(read_cr3_pa());
pgd_t *pgd = &base[pgd_index(addr)];
p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud = pud_offset(p4d, addr);
int n_pgds, i, j;
if (!efi_enabled(EFI_OLD_MEMMAP)) {
- save_pgd = (pgd_t *)read_cr3();
+ save_pgd = (pgd_t *)__read_cr3();
write_cr3((unsigned long)efi_scratch.efi_pgt);
goto out;
}
efi_sync_low_kernel_mappings();
local_irq_save(flags);
- efi_scratch.prev_cr3 = read_cr3();
+ efi_scratch.prev_cr3 = __read_cr3();
write_cr3((unsigned long)efi_scratch.efi_pgt);
__flush_tlb_all();
asmlinkage __visible int xo1_do_sleep(u8 sleep_state)
{
- void *pgd_addr = __va(read_cr3());
+ void *pgd_addr = __va(read_cr3_pa());
/* Program wakeup mask (using dword access to CS5536_PM1_EN) */
outl(wakeup_mask << 16, acpi_base + CS5536_PM1_STS);
*/
ctxt->cr0 = read_cr0();
ctxt->cr2 = read_cr2();
- ctxt->cr3 = read_cr3();
+ ctxt->cr3 = __read_cr3();
ctxt->cr4 = __read_cr4();
#ifdef CONFIG_X86_64
ctxt->cr8 = read_cr8();
memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE);
/* Make the page containing the relocated code executable */
- pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
+ pgd = (pgd_t *)__va(read_cr3_pa()) +
+ pgd_index(relocated_restore_code);
p4d = p4d_offset(pgd, relocated_restore_code);
if (p4d_large(*p4d)) {
set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
pmd_t pmd;
pte_t pte;
- pa = read_cr3();
+ pa = read_cr3_pa();
pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) *
sizeof(pgd)));
if (!pgd_present(pgd))
pt_phys = pmd_phys + PFN_PHYS(n_pmd);
p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
- pgd = __va(read_cr3());
+ pgd = __va(read_cr3_pa());
new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
idx_p4d = 0;
save_pud = n_pud;
{
unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
- BUG_ON(read_cr3() != __pa(initial_page_table));
+ BUG_ON(read_cr3_pa() != __pa(initial_page_table));
BUG_ON(cr3 != __pa(swapper_pg_dir));
/*