x86: clean up pte_exec
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / x86 / mm / fault_32.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 */
4
5#include <linux/signal.h>
6#include <linux/sched.h>
7#include <linux/kernel.h>
8#include <linux/errno.h>
9#include <linux/string.h>
10#include <linux/types.h>
11#include <linux/ptrace.h>
12#include <linux/mman.h>
13#include <linux/mm.h>
14#include <linux/smp.h>
1da177e4
LT
15#include <linux/interrupt.h>
16#include <linux/init.h>
17#include <linux/tty.h>
18#include <linux/vt_kern.h> /* For unblank_screen() */
19#include <linux/highmem.h>
28609f6e 20#include <linux/bootmem.h> /* for max_low_pfn */
1eeb66a1 21#include <linux/vmalloc.h>
1da177e4 22#include <linux/module.h>
3d97ae5b 23#include <linux/kprobes.h>
11a4180c 24#include <linux/uaccess.h>
1eeb66a1 25#include <linux/kdebug.h>
1da177e4
LT
26
27#include <asm/system.h>
1da177e4 28#include <asm/desc.h>
78be3706 29#include <asm/segment.h>
1da177e4 30
33cb5243
HH
31/*
32 * Page fault error code bits
33 * bit 0 == 0 means no page found, 1 means protection fault
34 * bit 1 == 0 means read, 1 means write
35 * bit 2 == 0 means kernel, 1 means user-mode
36 * bit 3 == 1 means use of reserved bit detected
37 * bit 4 == 1 means fault was an instruction fetch
38 */
6f4d368e 39#define PF_PROT (1<<0)
33cb5243 40#define PF_WRITE (1<<1)
6f4d368e
HH
41#define PF_USER (1<<2)
42#define PF_RSVD (1<<3)
33cb5243
HH
43#define PF_INSTR (1<<4)
44
74a0b576 45static inline int notify_page_fault(struct pt_regs *regs)
b71b5b65 46{
33cb5243 47#ifdef CONFIG_KPROBES
74a0b576
CH
48 int ret = 0;
49
50 /* kprobe_running() needs smp_processor_id() */
51 if (!user_mode_vm(regs)) {
52 preempt_disable();
53 if (kprobe_running() && kprobe_fault_handler(regs, 14))
54 ret = 1;
55 preempt_enable();
56 }
b71b5b65 57
74a0b576 58 return ret;
74a0b576 59#else
74a0b576 60 return 0;
74a0b576 61#endif
33cb5243 62}
b71b5b65 63
33cb5243 64/*
1dc85be0 65 * X86_32
1da177e4
LT
66 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
67 * Check that here and ignore it.
1dc85be0
HH
68 *
69 * X86_64
70 * Sometimes the CPU reports invalid exceptions on prefetch.
71 * Check that here and ignore it.
72 *
73 * Opcode checker based on code by Richard Brunner
1da177e4 74 */
1dc85be0
HH
75static int is_prefetch(struct pt_regs *regs, unsigned long addr,
76 unsigned long error_code)
33cb5243 77{
1dc85be0 78 unsigned char *instr;
1da177e4 79 int scan_more = 1;
33cb5243 80 int prefetch = 0;
1dc85be0
HH
81 unsigned char *max_instr;
82
83#ifdef CONFIG_X86_32
1dc85be0
HH
84 if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
85 boot_cpu_data.x86 >= 6)) {
86 /* Catch an obscure case of prefetch inside an NX page. */
87 if (nx_enabled && (error_code & PF_INSTR))
88 return 0;
89 } else {
90 return 0;
91 }
1dc85be0
HH
92#else
93 /* If it was a exec fault ignore */
94 if (error_code & PF_INSTR)
95 return 0;
1dc85be0 96#endif
1da177e4 97
f2857ce9 98 instr = (unsigned char *)convert_ip_to_linear(current, regs);
1dc85be0
HH
99 max_instr = instr + 15;
100
1dc85be0
HH
101 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
102 return 0;
1dc85be0
HH
103
104 while (scan_more && instr < max_instr) {
1da177e4
LT
105 unsigned char opcode;
106 unsigned char instr_hi;
107 unsigned char instr_lo;
108
11a4180c 109 if (probe_kernel_address(instr, opcode))
33cb5243 110 break;
1da177e4 111
33cb5243
HH
112 instr_hi = opcode & 0xf0;
113 instr_lo = opcode & 0x0f;
1da177e4
LT
114 instr++;
115
33cb5243 116 switch (instr_hi) {
1da177e4
LT
117 case 0x20:
118 case 0x30:
33cb5243
HH
119 /*
120 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
121 * In X86_64 long mode, the CPU will signal invalid
122 * opcode if some of these prefixes are present so
123 * X86_64 will never get here anyway
124 */
1da177e4
LT
125 scan_more = ((instr_lo & 7) == 0x6);
126 break;
33cb5243
HH
127#ifdef CONFIG_X86_64
128 case 0x40:
129 /*
130 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
131 * Need to figure out under what instruction mode the
132 * instruction was issued. Could check the LDT for lm,
133 * but for now it's good enough to assume that long
134 * mode only uses well known segments or kernel.
135 */
136 scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
137 break;
138#endif
1da177e4
LT
139 case 0x60:
140 /* 0x64 thru 0x67 are valid prefixes in all modes. */
141 scan_more = (instr_lo & 0xC) == 0x4;
33cb5243 142 break;
1da177e4 143 case 0xF0:
33cb5243 144 /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
1da177e4 145 scan_more = !instr_lo || (instr_lo>>1) == 1;
33cb5243 146 break;
1da177e4
LT
147 case 0x00:
148 /* Prefetch instruction is 0x0F0D or 0x0F18 */
149 scan_more = 0;
f2857ce9 150
11a4180c 151 if (probe_kernel_address(instr, opcode))
1da177e4
LT
152 break;
153 prefetch = (instr_lo == 0xF) &&
154 (opcode == 0x0D || opcode == 0x18);
33cb5243 155 break;
1da177e4
LT
156 default:
157 scan_more = 0;
158 break;
33cb5243 159 }
1da177e4
LT
160 }
161 return prefetch;
162}
163
c4aba4a8 164static void force_sig_info_fault(int si_signo, int si_code,
869f96a0
IM
165 unsigned long address, struct task_struct *tsk)
166{
167 siginfo_t info;
168
169 info.si_signo = si_signo;
170 info.si_errno = 0;
171 info.si_code = si_code;
172 info.si_addr = (void __user *)address;
173 force_sig_info(si_signo, &info, tsk);
174}
175
e66a9512
HH
176void dump_pagetable(unsigned long address)
177{
178 __typeof__(pte_val(__pte(0))) page;
179
180 page = read_cr3();
181 page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
182#ifdef CONFIG_X86_PAE
183 printk("*pdpt = %016Lx ", page);
184 if ((page >> PAGE_SHIFT) < max_low_pfn
185 && page & _PAGE_PRESENT) {
186 page &= PAGE_MASK;
187 page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
188 & (PTRS_PER_PMD - 1)];
189 printk(KERN_CONT "*pde = %016Lx ", page);
190 page &= ~_PAGE_NX;
191 }
192#else
193 printk("*pde = %08lx ", page);
194#endif
195
196 /*
197 * We must not directly access the pte in the highpte
198 * case if the page table is located in highmem.
199 * And let's rather not kmap-atomic the pte, just in case
200 * it's allocated already.
201 */
202 if ((page >> PAGE_SHIFT) < max_low_pfn
203 && (page & _PAGE_PRESENT)
204 && !(page & _PAGE_PSE)) {
205 page &= PAGE_MASK;
206 page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
207 & (PTRS_PER_PTE - 1)];
208 printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
209 }
210
211 printk("\n");
212}
213
75604d7f 214void do_invalid_op(struct pt_regs *, unsigned long);
1da177e4 215
101f12af
JB
216static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
217{
218 unsigned index = pgd_index(address);
219 pgd_t *pgd_k;
220 pud_t *pud, *pud_k;
221 pmd_t *pmd, *pmd_k;
222
223 pgd += index;
224 pgd_k = init_mm.pgd + index;
225
226 if (!pgd_present(*pgd_k))
227 return NULL;
228
229 /*
230 * set_pgd(pgd, *pgd_k); here would be useless on PAE
231 * and redundant with the set_pmd() on non-PAE. As would
232 * set_pud.
233 */
234
235 pud = pud_offset(pgd, address);
236 pud_k = pud_offset(pgd_k, address);
237 if (!pud_present(*pud_k))
238 return NULL;
239
240 pmd = pmd_offset(pud, address);
241 pmd_k = pmd_offset(pud_k, address);
242 if (!pmd_present(*pmd_k))
243 return NULL;
8b14cb99 244 if (!pmd_present(*pmd)) {
101f12af 245 set_pmd(pmd, *pmd_k);
8b14cb99
ZA
246 arch_flush_lazy_mmu_mode();
247 } else
101f12af
JB
248 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
249 return pmd_k;
250}
251
1dc85be0
HH
252#ifdef CONFIG_X86_64
253static const char errata93_warning[] =
254KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
255KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
256KERN_ERR "******* Please consider a BIOS update.\n"
257KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
fdfe8aa8 258#endif
1dc85be0
HH
259
260/* Workaround for K8 erratum #93 & buggy BIOS.
261 BIOS SMM functions are required to use a specific workaround
262 to avoid corruption of the 64bit RIP register on C stepping K8.
263 A lot of BIOS that didn't get tested properly miss this.
264 The OS sees this as a page fault with the upper 32bits of RIP cleared.
265 Try to work around it here.
fdfe8aa8
HH
266 Note we only handle faults in kernel here.
267 Does nothing for X86_32
268 */
1dc85be0
HH
269static int is_errata93(struct pt_regs *regs, unsigned long address)
270{
fdfe8aa8 271#ifdef CONFIG_X86_64
1dc85be0
HH
272 static int warned;
273 if (address != regs->ip)
274 return 0;
275 if ((address >> 32) != 0)
276 return 0;
277 address |= 0xffffffffUL << 32;
278 if ((address >= (u64)_stext && address <= (u64)_etext) ||
279 (address >= MODULES_VADDR && address <= MODULES_END)) {
280 if (!warned) {
281 printk(errata93_warning);
282 warned = 1;
283 }
284 regs->ip = address;
285 return 1;
286 }
fdfe8aa8 287#endif
1dc85be0
HH
288 return 0;
289}
fdfe8aa8 290
1dc85be0 291
101f12af
JB
292/*
293 * Handle a fault on the vmalloc or module mapping area
294 *
295 * This assumes no large pages in there.
296 */
297static inline int vmalloc_fault(unsigned long address)
298{
fdfe8aa8 299#ifdef CONFIG_X86_32
101f12af
JB
300 unsigned long pgd_paddr;
301 pmd_t *pmd_k;
302 pte_t *pte_k;
303 /*
304 * Synchronize this task's top level page-table
305 * with the 'reference' page table.
306 *
307 * Do _not_ use "current" here. We might be inside
308 * an interrupt in the middle of a task switch..
309 */
310 pgd_paddr = read_cr3();
311 pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
312 if (!pmd_k)
313 return -1;
314 pte_k = pte_offset_kernel(pmd_k, address);
315 if (!pte_present(*pte_k))
316 return -1;
317 return 0;
fdfe8aa8
HH
318#else
319 pgd_t *pgd, *pgd_ref;
320 pud_t *pud, *pud_ref;
321 pmd_t *pmd, *pmd_ref;
322 pte_t *pte, *pte_ref;
323
324 /* Copy kernel mappings over when needed. This can also
325 happen within a race in page table update. In the later
326 case just flush. */
327
328 pgd = pgd_offset(current->mm ?: &init_mm, address);
329 pgd_ref = pgd_offset_k(address);
330 if (pgd_none(*pgd_ref))
331 return -1;
332 if (pgd_none(*pgd))
333 set_pgd(pgd, *pgd_ref);
334 else
335 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
336
337 /* Below here mismatches are bugs because these lower tables
338 are shared */
339
340 pud = pud_offset(pgd, address);
341 pud_ref = pud_offset(pgd_ref, address);
342 if (pud_none(*pud_ref))
343 return -1;
344 if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
345 BUG();
346 pmd = pmd_offset(pud, address);
347 pmd_ref = pmd_offset(pud_ref, address);
348 if (pmd_none(*pmd_ref))
349 return -1;
350 if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
351 BUG();
352 pte_ref = pte_offset_kernel(pmd_ref, address);
353 if (!pte_present(*pte_ref))
354 return -1;
355 pte = pte_offset_kernel(pmd, address);
356 /* Don't use pte_page here, because the mappings can point
357 outside mem_map, and the NUMA hash lookup cannot handle
358 that. */
359 if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
360 BUG();
361 return 0;
362#endif
101f12af
JB
363}
364
abd4f750
MAS
365int show_unhandled_signals = 1;
366
1da177e4
LT
367/*
368 * This routine handles page faults. It determines the address,
369 * and the problem, and then passes it off to one of the appropriate
370 * routines.
1da177e4 371 */
75604d7f 372void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
1da177e4
LT
373{
374 struct task_struct *tsk;
375 struct mm_struct *mm;
33cb5243 376 struct vm_area_struct *vma;
1da177e4 377 unsigned long address;
869f96a0 378 int write, si_code;
83c54070 379 int fault;
1da177e4 380
143a5d32
PZ
381 /*
382 * We can fault from pretty much anywhere, with unknown IRQ state.
383 */
384 trace_hardirqs_fixup();
385
608566b4
HH
386 tsk = current;
387 mm = tsk->mm;
388 prefetchw(&mm->mmap_sem);
389
1da177e4 390 /* get the address */
33cb5243 391 address = read_cr2();
1da177e4 392
869f96a0 393 si_code = SEGV_MAPERR;
1da177e4 394
608566b4
HH
395 if (notify_page_fault(regs))
396 return;
397
1da177e4
LT
398 /*
399 * We fault-in kernel-space virtual memory on-demand. The
400 * 'reference' page table is init_mm.pgd.
401 *
402 * NOTE! We MUST NOT take any locks for this case. We may
403 * be in an interrupt or a critical region, and should
404 * only copy the information from the master page table,
405 * nothing more.
406 *
407 * This verifies that the fault happens in kernel space
408 * (error_code & 4) == 0, and that the fault was not a
101f12af 409 * protection error (error_code & 9) == 0.
1da177e4 410 */
101f12af 411 if (unlikely(address >= TASK_SIZE)) {
318aa296
HH
412 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
413 vmalloc_fault(address) >= 0)
101f12af 414 return;
101f12af 415 /*
1da177e4
LT
416 * Don't take the mm semaphore here. If we fixup a prefetch
417 * fault we could otherwise deadlock.
418 */
419 goto bad_area_nosemaphore;
101f12af
JB
420 }
421
101f12af
JB
422 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
423 fault has been handled. */
65ea5b03 424 if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
101f12af 425 local_irq_enable();
1da177e4 426
1da177e4
LT
427 /*
428 * If we're in an interrupt, have no user context or are running in an
33cb5243 429 * atomic region then we must not take the fault.
1da177e4
LT
430 */
431 if (in_atomic() || !mm)
432 goto bad_area_nosemaphore;
433
434 /* When running in the kernel we expect faults to occur only to
435 * addresses in user space. All other faults represent errors in the
27b46d76 436 * kernel and should generate an OOPS. Unfortunately, in the case of an
80f7228b 437 * erroneous fault occurring in a code path which already holds mmap_sem
1da177e4
LT
438 * we will deadlock attempting to validate the fault against the
439 * address space. Luckily the kernel only validly references user
440 * space from well defined areas of code, which are listed in the
441 * exceptions table.
442 *
443 * As the vast majority of faults will be valid we will only perform
27b46d76 444 * the source reference check when there is a possibility of a deadlock.
1da177e4
LT
445 * Attempt to lock the address space, if we cannot we then validate the
446 * source. If this is invalid we can skip the address space check,
447 * thus avoiding the deadlock.
448 */
449 if (!down_read_trylock(&mm->mmap_sem)) {
33cb5243 450 if ((error_code & PF_USER) == 0 &&
65ea5b03 451 !search_exception_tables(regs->ip))
1da177e4
LT
452 goto bad_area_nosemaphore;
453 down_read(&mm->mmap_sem);
454 }
455
456 vma = find_vma(mm, address);
457 if (!vma)
458 goto bad_area;
459 if (vma->vm_start <= address)
460 goto good_area;
461 if (!(vma->vm_flags & VM_GROWSDOWN))
462 goto bad_area;
33cb5243 463 if (error_code & PF_USER) {
1da177e4 464 /*
65ea5b03 465 * Accessing the stack below %sp is always a bug.
21528454
CE
466 * The large cushion allows instructions like enter
467 * and pusha to work. ("enter $65535,$31" pushes
65ea5b03 468 * 32 pointers and then decrements %sp by 65535.)
1da177e4 469 */
65ea5b03 470 if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
1da177e4
LT
471 goto bad_area;
472 }
473 if (expand_stack(vma, address))
474 goto bad_area;
475/*
476 * Ok, we have a good vm_area for this memory access, so
477 * we can handle it..
478 */
479good_area:
869f96a0 480 si_code = SEGV_ACCERR;
1da177e4 481 write = 0;
33cb5243
HH
482 switch (error_code & (PF_PROT|PF_WRITE)) {
483 default: /* 3: write, present */
484 /* fall through */
485 case PF_WRITE: /* write, not present */
486 if (!(vma->vm_flags & VM_WRITE))
487 goto bad_area;
488 write++;
489 break;
490 case PF_PROT: /* read, present */
491 goto bad_area;
492 case 0: /* read, not present */
493 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
1da177e4 494 goto bad_area;
1da177e4
LT
495 }
496
497 survive:
498 /*
499 * If for any reason at all we couldn't handle the fault,
500 * make sure we exit gracefully rather than endlessly redo
501 * the fault.
502 */
83c54070
NP
503 fault = handle_mm_fault(mm, vma, address, write);
504 if (unlikely(fault & VM_FAULT_ERROR)) {
505 if (fault & VM_FAULT_OOM)
1da177e4 506 goto out_of_memory;
83c54070
NP
507 else if (fault & VM_FAULT_SIGBUS)
508 goto do_sigbus;
509 BUG();
1da177e4 510 }
83c54070
NP
511 if (fault & VM_FAULT_MAJOR)
512 tsk->maj_flt++;
513 else
514 tsk->min_flt++;
1da177e4 515
d729ab35 516#ifdef CONFIG_X86_32
1da177e4
LT
517 /*
518 * Did it hit the DOS screen memory VA from vm86 mode?
519 */
d729ab35 520 if (v8086_mode(regs)) {
1da177e4
LT
521 unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
522 if (bit < 32)
523 tsk->thread.screen_bitmap |= 1 << bit;
524 }
d729ab35 525#endif
1da177e4
LT
526 up_read(&mm->mmap_sem);
527 return;
528
529/*
530 * Something tried to access memory that isn't in our memory map..
531 * Fix it, but check if it's kernel or user first..
532 */
533bad_area:
534 up_read(&mm->mmap_sem);
535
536bad_area_nosemaphore:
537 /* User mode accesses just cause a SIGSEGV */
33cb5243 538 if (error_code & PF_USER) {
e5e3c84b
SR
539 /*
540 * It's possible to have interrupts off here.
541 */
542 local_irq_enable();
543
33cb5243
HH
544 /*
545 * Valid to do another page fault here because this one came
1da177e4
LT
546 * from user space.
547 */
548 if (is_prefetch(regs, address, error_code))
549 return;
550
abd4f750
MAS
551 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
552 printk_ratelimit()) {
6f4d368e
HH
553 printk(
554#ifdef CONFIG_X86_32
edcd8119 555 "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
6f4d368e 556#else
03252919 557 "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
6f4d368e
HH
558#endif
559 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
560 tsk->comm, task_pid_nr(tsk), address, regs->ip,
561 regs->sp, error_code);
03252919
AK
562 print_vma_addr(" in ", regs->ip);
563 printk("\n");
abd4f750 564 }
1da177e4
LT
565 tsk->thread.cr2 = address;
566 /* Kernel addresses are always protection faults */
567 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
568 tsk->thread.trap_no = 14;
869f96a0 569 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
1da177e4
LT
570 return;
571 }
572
573#ifdef CONFIG_X86_F00F_BUG
574 /*
575 * Pentium F0 0F C7 C8 bug workaround.
576 */
577 if (boot_cpu_data.f00f_bug) {
578 unsigned long nr;
33cb5243 579
1da177e4
LT
580 nr = (address - idt_descr.address) >> 3;
581
582 if (nr == 6) {
583 do_invalid_op(regs, 0);
584 return;
585 }
586 }
587#endif
588
589no_context:
590 /* Are we prepared to handle this kernel fault? */
591 if (fixup_exception(regs))
592 return;
593
33cb5243 594 /*
1da177e4 595 * Valid to do another page fault here, because if this fault
33cb5243 596 * had been triggered by is_prefetch fixup_exception would have
1da177e4
LT
597 * handled it.
598 */
33cb5243
HH
599 if (is_prefetch(regs, address, error_code))
600 return;
1da177e4 601
fdfe8aa8
HH
602 if (is_errata93(regs, address))
603 return;
604
1da177e4
LT
605/*
606 * Oops. The kernel tried to access some bad page. We'll have to
607 * terminate things with extreme prejudice.
608 */
609
610 bust_spinlocks(1);
611
dd287796 612 if (oops_may_print()) {
28609f6e
JB
613
614#ifdef CONFIG_X86_PAE
318aa296 615 if (error_code & PF_INSTR) {
dd287796
AM
616 pte_t *pte = lookup_address(address);
617
4c3c4b45 618 if (pte && pte_present(*pte) && !pte_exec(*pte))
dd287796
AM
619 printk(KERN_CRIT "kernel tried to execute "
620 "NX-protected page - exploit attempt? "
621 "(uid: %d)\n", current->uid);
622 }
28609f6e 623#endif
dd287796
AM
624 if (address < PAGE_SIZE)
625 printk(KERN_ALERT "BUG: unable to handle kernel NULL "
626 "pointer dereference");
627 else
628 printk(KERN_ALERT "BUG: unable to handle kernel paging"
629 " request");
33cb5243 630 printk(" at virtual address %08lx\n", address);
65ea5b03 631 printk(KERN_ALERT "printing ip: %08lx ", regs->ip);
28609f6e 632
e66a9512 633 dump_pagetable(address);
28609f6e
JB
634 }
635
4f339ecb
AN
636 tsk->thread.cr2 = address;
637 tsk->thread.trap_no = 14;
638 tsk->thread.error_code = error_code;
1da177e4
LT
639 die("Oops", regs, error_code);
640 bust_spinlocks(0);
641 do_exit(SIGKILL);
642
643/*
644 * We ran out of memory, or some other thing happened to us that made
645 * us unable to handle the page fault gracefully.
646 */
647out_of_memory:
648 up_read(&mm->mmap_sem);
b460cbc5 649 if (is_global_init(tsk)) {
1da177e4
LT
650 yield();
651 down_read(&mm->mmap_sem);
652 goto survive;
653 }
654 printk("VM: killing process %s\n", tsk->comm);
318aa296 655 if (error_code & PF_USER)
dcca2bde 656 do_group_exit(SIGKILL);
1da177e4
LT
657 goto no_context;
658
659do_sigbus:
660 up_read(&mm->mmap_sem);
661
662 /* Kernel mode? Handle exceptions or die */
33cb5243 663 if (!(error_code & PF_USER))
1da177e4
LT
664 goto no_context;
665
666 /* User space => ok to do another page fault */
667 if (is_prefetch(regs, address, error_code))
668 return;
669
670 tsk->thread.cr2 = address;
671 tsk->thread.error_code = error_code;
672 tsk->thread.trap_no = 14;
869f96a0 673 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
101f12af 674}
1da177e4 675
101f12af
JB
676void vmalloc_sync_all(void)
677{
678 /*
679 * Note that races in the updates of insync and start aren't
680 * problematic: insync can only get set bits added, and updates to
681 * start are only improving performance (without affecting correctness
682 * if undone).
683 */
684 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
685 static unsigned long start = TASK_SIZE;
686 unsigned long address;
1da177e4 687
5311ab62
JF
688 if (SHARED_KERNEL_PMD)
689 return;
690
101f12af
JB
691 BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
692 for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
693 if (!test_bit(pgd_index(address), insync)) {
694 unsigned long flags;
695 struct page *page;
696
697 spin_lock_irqsave(&pgd_lock, flags);
698 for (page = pgd_list; page; page =
699 (struct page *)page->index)
700 if (!vmalloc_sync_one(page_address(page),
701 address)) {
702 BUG_ON(page != pgd_list);
703 break;
704 }
705 spin_unlock_irqrestore(&pgd_lock, flags);
706 if (!page)
707 set_bit(pgd_index(address), insync);
708 }
709 if (address == start && test_bit(pgd_index(address), insync))
710 start = address + PGDIR_SIZE;
1da177e4
LT
711 }
712}