ftrace: add basic support for gcc profiler instrumentation
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / x86 / kernel / entry_64.S
1 /*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 */
8
9 /*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
38 */
39
40 #include <linux/linkage.h>
41 #include <asm/segment.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
47 #include <asm/msr.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
51 #include <asm/page.h>
52 #include <asm/irqflags.h>
53 #include <asm/paravirt.h>
54
55 .code64
56
57 #ifdef CONFIG_FTRACE
58 ENTRY(mcount)
59 cmpq $ftrace_stub, ftrace_trace_function
60 jnz trace
61 .globl ftrace_stub
62 ftrace_stub:
63 retq
64
65 trace:
66 /* taken from glibc */
67 subq $0x38, %rsp
68 movq %rax, (%rsp)
69 movq %rcx, 8(%rsp)
70 movq %rdx, 16(%rsp)
71 movq %rsi, 24(%rsp)
72 movq %rdi, 32(%rsp)
73 movq %r8, 40(%rsp)
74 movq %r9, 48(%rsp)
75
76 movq 0x38(%rsp), %rdi
77 movq 8(%rbp), %rsi
78
79 call *ftrace_trace_function
80
81 movq 48(%rsp), %r9
82 movq 40(%rsp), %r8
83 movq 32(%rsp), %rdi
84 movq 24(%rsp), %rsi
85 movq 16(%rsp), %rdx
86 movq 8(%rsp), %rcx
87 movq (%rsp), %rax
88 addq $0x38, %rsp
89
90 jmp ftrace_stub
91 END(mcount)
92 #endif
93
94 #ifndef CONFIG_PREEMPT
95 #define retint_kernel retint_restore_args
96 #endif
97
98 #ifdef CONFIG_PARAVIRT
99 ENTRY(native_irq_enable_syscall_ret)
100 movq %gs:pda_oldrsp,%rsp
101 swapgs
102 sysretq
103 #endif /* CONFIG_PARAVIRT */
104
105
106 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
107 #ifdef CONFIG_TRACE_IRQFLAGS
108 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
109 jnc 1f
110 TRACE_IRQS_ON
111 1:
112 #endif
113 .endm
114
115 /*
116 * C code is not supposed to know about undefined top of stack. Every time
117 * a C function with an pt_regs argument is called from the SYSCALL based
118 * fast path FIXUP_TOP_OF_STACK is needed.
119 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
120 * manipulation.
121 */
122
123 /* %rsp:at FRAMEEND */
124 .macro FIXUP_TOP_OF_STACK tmp
125 movq %gs:pda_oldrsp,\tmp
126 movq \tmp,RSP(%rsp)
127 movq $__USER_DS,SS(%rsp)
128 movq $__USER_CS,CS(%rsp)
129 movq $-1,RCX(%rsp)
130 movq R11(%rsp),\tmp /* get eflags */
131 movq \tmp,EFLAGS(%rsp)
132 .endm
133
134 .macro RESTORE_TOP_OF_STACK tmp,offset=0
135 movq RSP-\offset(%rsp),\tmp
136 movq \tmp,%gs:pda_oldrsp
137 movq EFLAGS-\offset(%rsp),\tmp
138 movq \tmp,R11-\offset(%rsp)
139 .endm
140
141 .macro FAKE_STACK_FRAME child_rip
142 /* push in order ss, rsp, eflags, cs, rip */
143 xorl %eax, %eax
144 pushq %rax /* ss */
145 CFI_ADJUST_CFA_OFFSET 8
146 /*CFI_REL_OFFSET ss,0*/
147 pushq %rax /* rsp */
148 CFI_ADJUST_CFA_OFFSET 8
149 CFI_REL_OFFSET rsp,0
150 pushq $(1<<9) /* eflags - interrupts on */
151 CFI_ADJUST_CFA_OFFSET 8
152 /*CFI_REL_OFFSET rflags,0*/
153 pushq $__KERNEL_CS /* cs */
154 CFI_ADJUST_CFA_OFFSET 8
155 /*CFI_REL_OFFSET cs,0*/
156 pushq \child_rip /* rip */
157 CFI_ADJUST_CFA_OFFSET 8
158 CFI_REL_OFFSET rip,0
159 pushq %rax /* orig rax */
160 CFI_ADJUST_CFA_OFFSET 8
161 .endm
162
163 .macro UNFAKE_STACK_FRAME
164 addq $8*6, %rsp
165 CFI_ADJUST_CFA_OFFSET -(6*8)
166 .endm
167
168 .macro CFI_DEFAULT_STACK start=1
169 .if \start
170 CFI_STARTPROC simple
171 CFI_SIGNAL_FRAME
172 CFI_DEF_CFA rsp,SS+8
173 .else
174 CFI_DEF_CFA_OFFSET SS+8
175 .endif
176 CFI_REL_OFFSET r15,R15
177 CFI_REL_OFFSET r14,R14
178 CFI_REL_OFFSET r13,R13
179 CFI_REL_OFFSET r12,R12
180 CFI_REL_OFFSET rbp,RBP
181 CFI_REL_OFFSET rbx,RBX
182 CFI_REL_OFFSET r11,R11
183 CFI_REL_OFFSET r10,R10
184 CFI_REL_OFFSET r9,R9
185 CFI_REL_OFFSET r8,R8
186 CFI_REL_OFFSET rax,RAX
187 CFI_REL_OFFSET rcx,RCX
188 CFI_REL_OFFSET rdx,RDX
189 CFI_REL_OFFSET rsi,RSI
190 CFI_REL_OFFSET rdi,RDI
191 CFI_REL_OFFSET rip,RIP
192 /*CFI_REL_OFFSET cs,CS*/
193 /*CFI_REL_OFFSET rflags,EFLAGS*/
194 CFI_REL_OFFSET rsp,RSP
195 /*CFI_REL_OFFSET ss,SS*/
196 .endm
197 /*
198 * A newly forked process directly context switches into this.
199 */
200 /* rdi: prev */
201 ENTRY(ret_from_fork)
202 CFI_DEFAULT_STACK
203 push kernel_eflags(%rip)
204 CFI_ADJUST_CFA_OFFSET 4
205 popf # reset kernel eflags
206 CFI_ADJUST_CFA_OFFSET -4
207 call schedule_tail
208 GET_THREAD_INFO(%rcx)
209 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
210 jnz rff_trace
211 rff_action:
212 RESTORE_REST
213 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
214 je int_ret_from_sys_call
215 testl $_TIF_IA32,threadinfo_flags(%rcx)
216 jnz int_ret_from_sys_call
217 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
218 jmp ret_from_sys_call
219 rff_trace:
220 movq %rsp,%rdi
221 call syscall_trace_leave
222 GET_THREAD_INFO(%rcx)
223 jmp rff_action
224 CFI_ENDPROC
225 END(ret_from_fork)
226
227 /*
228 * System call entry. Upto 6 arguments in registers are supported.
229 *
230 * SYSCALL does not save anything on the stack and does not change the
231 * stack pointer.
232 */
233
234 /*
235 * Register setup:
236 * rax system call number
237 * rdi arg0
238 * rcx return address for syscall/sysret, C arg3
239 * rsi arg1
240 * rdx arg2
241 * r10 arg3 (--> moved to rcx for C)
242 * r8 arg4
243 * r9 arg5
244 * r11 eflags for syscall/sysret, temporary for C
245 * r12-r15,rbp,rbx saved by C code, not touched.
246 *
247 * Interrupts are off on entry.
248 * Only called from user space.
249 *
250 * XXX if we had a free scratch register we could save the RSP into the stack frame
251 * and report it properly in ps. Unfortunately we haven't.
252 *
253 * When user can change the frames always force IRET. That is because
254 * it deals with uncanonical addresses better. SYSRET has trouble
255 * with them due to bugs in both AMD and Intel CPUs.
256 */
257
258 ENTRY(system_call)
259 CFI_STARTPROC simple
260 CFI_SIGNAL_FRAME
261 CFI_DEF_CFA rsp,PDA_STACKOFFSET
262 CFI_REGISTER rip,rcx
263 /*CFI_REGISTER rflags,r11*/
264 SWAPGS_UNSAFE_STACK
265 /*
266 * A hypervisor implementation might want to use a label
267 * after the swapgs, so that it can do the swapgs
268 * for the guest and jump here on syscall.
269 */
270 ENTRY(system_call_after_swapgs)
271
272 movq %rsp,%gs:pda_oldrsp
273 movq %gs:pda_kernelstack,%rsp
274 /*
275 * No need to follow this irqs off/on section - it's straight
276 * and short:
277 */
278 ENABLE_INTERRUPTS(CLBR_NONE)
279 SAVE_ARGS 8,1
280 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
281 movq %rcx,RIP-ARGOFFSET(%rsp)
282 CFI_REL_OFFSET rip,RIP-ARGOFFSET
283 GET_THREAD_INFO(%rcx)
284 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
285 jnz tracesys
286 cmpq $__NR_syscall_max,%rax
287 ja badsys
288 movq %r10,%rcx
289 call *sys_call_table(,%rax,8) # XXX: rip relative
290 movq %rax,RAX-ARGOFFSET(%rsp)
291 /*
292 * Syscall return path ending with SYSRET (fast path)
293 * Has incomplete stack frame and undefined top of stack.
294 */
295 ret_from_sys_call:
296 movl $_TIF_ALLWORK_MASK,%edi
297 /* edi: flagmask */
298 sysret_check:
299 LOCKDEP_SYS_EXIT
300 GET_THREAD_INFO(%rcx)
301 DISABLE_INTERRUPTS(CLBR_NONE)
302 TRACE_IRQS_OFF
303 movl threadinfo_flags(%rcx),%edx
304 andl %edi,%edx
305 jnz sysret_careful
306 CFI_REMEMBER_STATE
307 /*
308 * sysretq will re-enable interrupts:
309 */
310 TRACE_IRQS_ON
311 movq RIP-ARGOFFSET(%rsp),%rcx
312 CFI_REGISTER rip,rcx
313 RESTORE_ARGS 0,-ARG_SKIP,1
314 /*CFI_REGISTER rflags,r11*/
315 ENABLE_INTERRUPTS_SYSCALL_RET
316
317 CFI_RESTORE_STATE
318 /* Handle reschedules */
319 /* edx: work, edi: workmask */
320 sysret_careful:
321 bt $TIF_NEED_RESCHED,%edx
322 jnc sysret_signal
323 TRACE_IRQS_ON
324 ENABLE_INTERRUPTS(CLBR_NONE)
325 pushq %rdi
326 CFI_ADJUST_CFA_OFFSET 8
327 call schedule
328 popq %rdi
329 CFI_ADJUST_CFA_OFFSET -8
330 jmp sysret_check
331
332 /* Handle a signal */
333 sysret_signal:
334 TRACE_IRQS_ON
335 ENABLE_INTERRUPTS(CLBR_NONE)
336 testl $_TIF_DO_NOTIFY_MASK,%edx
337 jz 1f
338
339 /* Really a signal */
340 /* edx: work flags (arg3) */
341 leaq do_notify_resume(%rip),%rax
342 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
343 xorl %esi,%esi # oldset -> arg2
344 call ptregscall_common
345 1: movl $_TIF_NEED_RESCHED,%edi
346 /* Use IRET because user could have changed frame. This
347 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
348 DISABLE_INTERRUPTS(CLBR_NONE)
349 TRACE_IRQS_OFF
350 jmp int_with_check
351
352 badsys:
353 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
354 jmp ret_from_sys_call
355
356 /* Do syscall tracing */
357 tracesys:
358 SAVE_REST
359 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
360 FIXUP_TOP_OF_STACK %rdi
361 movq %rsp,%rdi
362 call syscall_trace_enter
363 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
364 RESTORE_REST
365 cmpq $__NR_syscall_max,%rax
366 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
367 movq %r10,%rcx /* fixup for C */
368 call *sys_call_table(,%rax,8)
369 movq %rax,RAX-ARGOFFSET(%rsp)
370 /* Use IRET because user could have changed frame */
371
372 /*
373 * Syscall return path ending with IRET.
374 * Has correct top of stack, but partial stack frame.
375 */
376 .globl int_ret_from_sys_call
377 int_ret_from_sys_call:
378 DISABLE_INTERRUPTS(CLBR_NONE)
379 TRACE_IRQS_OFF
380 testl $3,CS-ARGOFFSET(%rsp)
381 je retint_restore_args
382 movl $_TIF_ALLWORK_MASK,%edi
383 /* edi: mask to check */
384 int_with_check:
385 LOCKDEP_SYS_EXIT_IRQ
386 GET_THREAD_INFO(%rcx)
387 movl threadinfo_flags(%rcx),%edx
388 andl %edi,%edx
389 jnz int_careful
390 andl $~TS_COMPAT,threadinfo_status(%rcx)
391 jmp retint_swapgs
392
393 /* Either reschedule or signal or syscall exit tracking needed. */
394 /* First do a reschedule test. */
395 /* edx: work, edi: workmask */
396 int_careful:
397 bt $TIF_NEED_RESCHED,%edx
398 jnc int_very_careful
399 TRACE_IRQS_ON
400 ENABLE_INTERRUPTS(CLBR_NONE)
401 pushq %rdi
402 CFI_ADJUST_CFA_OFFSET 8
403 call schedule
404 popq %rdi
405 CFI_ADJUST_CFA_OFFSET -8
406 DISABLE_INTERRUPTS(CLBR_NONE)
407 TRACE_IRQS_OFF
408 jmp int_with_check
409
410 /* handle signals and tracing -- both require a full stack frame */
411 int_very_careful:
412 TRACE_IRQS_ON
413 ENABLE_INTERRUPTS(CLBR_NONE)
414 SAVE_REST
415 /* Check for syscall exit trace */
416 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
417 jz int_signal
418 pushq %rdi
419 CFI_ADJUST_CFA_OFFSET 8
420 leaq 8(%rsp),%rdi # &ptregs -> arg1
421 call syscall_trace_leave
422 popq %rdi
423 CFI_ADJUST_CFA_OFFSET -8
424 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
425 jmp int_restore_rest
426
427 int_signal:
428 testl $_TIF_DO_NOTIFY_MASK,%edx
429 jz 1f
430 movq %rsp,%rdi # &ptregs -> arg1
431 xorl %esi,%esi # oldset -> arg2
432 call do_notify_resume
433 1: movl $_TIF_NEED_RESCHED,%edi
434 int_restore_rest:
435 RESTORE_REST
436 DISABLE_INTERRUPTS(CLBR_NONE)
437 TRACE_IRQS_OFF
438 jmp int_with_check
439 CFI_ENDPROC
440 END(system_call)
441
442 /*
443 * Certain special system calls that need to save a complete full stack frame.
444 */
445
446 .macro PTREGSCALL label,func,arg
447 .globl \label
448 \label:
449 leaq \func(%rip),%rax
450 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
451 jmp ptregscall_common
452 END(\label)
453 .endm
454
455 CFI_STARTPROC
456
457 PTREGSCALL stub_clone, sys_clone, %r8
458 PTREGSCALL stub_fork, sys_fork, %rdi
459 PTREGSCALL stub_vfork, sys_vfork, %rdi
460 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
461 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
462 PTREGSCALL stub_iopl, sys_iopl, %rsi
463
464 ENTRY(ptregscall_common)
465 popq %r11
466 CFI_ADJUST_CFA_OFFSET -8
467 CFI_REGISTER rip, r11
468 SAVE_REST
469 movq %r11, %r15
470 CFI_REGISTER rip, r15
471 FIXUP_TOP_OF_STACK %r11
472 call *%rax
473 RESTORE_TOP_OF_STACK %r11
474 movq %r15, %r11
475 CFI_REGISTER rip, r11
476 RESTORE_REST
477 pushq %r11
478 CFI_ADJUST_CFA_OFFSET 8
479 CFI_REL_OFFSET rip, 0
480 ret
481 CFI_ENDPROC
482 END(ptregscall_common)
483
484 ENTRY(stub_execve)
485 CFI_STARTPROC
486 popq %r11
487 CFI_ADJUST_CFA_OFFSET -8
488 CFI_REGISTER rip, r11
489 SAVE_REST
490 FIXUP_TOP_OF_STACK %r11
491 movq %rsp, %rcx
492 call sys_execve
493 RESTORE_TOP_OF_STACK %r11
494 movq %rax,RAX(%rsp)
495 RESTORE_REST
496 jmp int_ret_from_sys_call
497 CFI_ENDPROC
498 END(stub_execve)
499
500 /*
501 * sigreturn is special because it needs to restore all registers on return.
502 * This cannot be done with SYSRET, so use the IRET return path instead.
503 */
504 ENTRY(stub_rt_sigreturn)
505 CFI_STARTPROC
506 addq $8, %rsp
507 CFI_ADJUST_CFA_OFFSET -8
508 SAVE_REST
509 movq %rsp,%rdi
510 FIXUP_TOP_OF_STACK %r11
511 call sys_rt_sigreturn
512 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
513 RESTORE_REST
514 jmp int_ret_from_sys_call
515 CFI_ENDPROC
516 END(stub_rt_sigreturn)
517
518 /*
519 * initial frame state for interrupts and exceptions
520 */
521 .macro _frame ref
522 CFI_STARTPROC simple
523 CFI_SIGNAL_FRAME
524 CFI_DEF_CFA rsp,SS+8-\ref
525 /*CFI_REL_OFFSET ss,SS-\ref*/
526 CFI_REL_OFFSET rsp,RSP-\ref
527 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
528 /*CFI_REL_OFFSET cs,CS-\ref*/
529 CFI_REL_OFFSET rip,RIP-\ref
530 .endm
531
532 /* initial frame state for interrupts (and exceptions without error code) */
533 #define INTR_FRAME _frame RIP
534 /* initial frame state for exceptions with error code (and interrupts with
535 vector already pushed) */
536 #define XCPT_FRAME _frame ORIG_RAX
537
538 /*
539 * Interrupt entry/exit.
540 *
541 * Interrupt entry points save only callee clobbered registers in fast path.
542 *
543 * Entry runs with interrupts off.
544 */
545
546 /* 0(%rsp): interrupt number */
547 .macro interrupt func
548 cld
549 SAVE_ARGS
550 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
551 pushq %rbp
552 CFI_ADJUST_CFA_OFFSET 8
553 CFI_REL_OFFSET rbp, 0
554 movq %rsp,%rbp
555 CFI_DEF_CFA_REGISTER rbp
556 testl $3,CS(%rdi)
557 je 1f
558 SWAPGS
559 /* irqcount is used to check if a CPU is already on an interrupt
560 stack or not. While this is essentially redundant with preempt_count
561 it is a little cheaper to use a separate counter in the PDA
562 (short of moving irq_enter into assembly, which would be too
563 much work) */
564 1: incl %gs:pda_irqcount
565 cmoveq %gs:pda_irqstackptr,%rsp
566 push %rbp # backlink for old unwinder
567 /*
568 * We entered an interrupt context - irqs are off:
569 */
570 TRACE_IRQS_OFF
571 call \func
572 .endm
573
574 ENTRY(common_interrupt)
575 XCPT_FRAME
576 interrupt do_IRQ
577 /* 0(%rsp): oldrsp-ARGOFFSET */
578 ret_from_intr:
579 DISABLE_INTERRUPTS(CLBR_NONE)
580 TRACE_IRQS_OFF
581 decl %gs:pda_irqcount
582 leaveq
583 CFI_DEF_CFA_REGISTER rsp
584 CFI_ADJUST_CFA_OFFSET -8
585 exit_intr:
586 GET_THREAD_INFO(%rcx)
587 testl $3,CS-ARGOFFSET(%rsp)
588 je retint_kernel
589
590 /* Interrupt came from user space */
591 /*
592 * Has a correct top of stack, but a partial stack frame
593 * %rcx: thread info. Interrupts off.
594 */
595 retint_with_reschedule:
596 movl $_TIF_WORK_MASK,%edi
597 retint_check:
598 LOCKDEP_SYS_EXIT_IRQ
599 movl threadinfo_flags(%rcx),%edx
600 andl %edi,%edx
601 CFI_REMEMBER_STATE
602 jnz retint_careful
603
604 retint_swapgs: /* return to user-space */
605 /*
606 * The iretq could re-enable interrupts:
607 */
608 DISABLE_INTERRUPTS(CLBR_ANY)
609 TRACE_IRQS_IRETQ
610 SWAPGS
611 jmp restore_args
612
613 retint_restore_args: /* return to kernel space */
614 DISABLE_INTERRUPTS(CLBR_ANY)
615 /*
616 * The iretq could re-enable interrupts:
617 */
618 TRACE_IRQS_IRETQ
619 restore_args:
620 RESTORE_ARGS 0,8,0
621
622 irq_return:
623 INTERRUPT_RETURN
624
625 .section __ex_table, "a"
626 .quad irq_return, bad_iret
627 .previous
628
629 #ifdef CONFIG_PARAVIRT
630 ENTRY(native_iret)
631 iretq
632
633 .section __ex_table,"a"
634 .quad native_iret, bad_iret
635 .previous
636 #endif
637
638 .section .fixup,"ax"
639 bad_iret:
640 /*
641 * The iret traps when the %cs or %ss being restored is bogus.
642 * We've lost the original trap vector and error code.
643 * #GPF is the most likely one to get for an invalid selector.
644 * So pretend we completed the iret and took the #GPF in user mode.
645 *
646 * We are now running with the kernel GS after exception recovery.
647 * But error_entry expects us to have user GS to match the user %cs,
648 * so swap back.
649 */
650 pushq $0
651
652 SWAPGS
653 jmp general_protection
654
655 .previous
656
657 /* edi: workmask, edx: work */
658 retint_careful:
659 CFI_RESTORE_STATE
660 bt $TIF_NEED_RESCHED,%edx
661 jnc retint_signal
662 TRACE_IRQS_ON
663 ENABLE_INTERRUPTS(CLBR_NONE)
664 pushq %rdi
665 CFI_ADJUST_CFA_OFFSET 8
666 call schedule
667 popq %rdi
668 CFI_ADJUST_CFA_OFFSET -8
669 GET_THREAD_INFO(%rcx)
670 DISABLE_INTERRUPTS(CLBR_NONE)
671 TRACE_IRQS_OFF
672 jmp retint_check
673
674 retint_signal:
675 testl $_TIF_DO_NOTIFY_MASK,%edx
676 jz retint_swapgs
677 TRACE_IRQS_ON
678 ENABLE_INTERRUPTS(CLBR_NONE)
679 SAVE_REST
680 movq $-1,ORIG_RAX(%rsp)
681 xorl %esi,%esi # oldset
682 movq %rsp,%rdi # &pt_regs
683 call do_notify_resume
684 RESTORE_REST
685 DISABLE_INTERRUPTS(CLBR_NONE)
686 TRACE_IRQS_OFF
687 movl $_TIF_NEED_RESCHED,%edi
688 GET_THREAD_INFO(%rcx)
689 jmp retint_check
690
691 #ifdef CONFIG_PREEMPT
692 /* Returning to kernel space. Check if we need preemption */
693 /* rcx: threadinfo. interrupts off. */
694 ENTRY(retint_kernel)
695 cmpl $0,threadinfo_preempt_count(%rcx)
696 jnz retint_restore_args
697 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
698 jnc retint_restore_args
699 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
700 jnc retint_restore_args
701 call preempt_schedule_irq
702 jmp exit_intr
703 #endif
704
705 CFI_ENDPROC
706 END(common_interrupt)
707
708 /*
709 * APIC interrupts.
710 */
711 .macro apicinterrupt num,func
712 INTR_FRAME
713 pushq $~(\num)
714 CFI_ADJUST_CFA_OFFSET 8
715 interrupt \func
716 jmp ret_from_intr
717 CFI_ENDPROC
718 .endm
719
720 ENTRY(thermal_interrupt)
721 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
722 END(thermal_interrupt)
723
724 ENTRY(threshold_interrupt)
725 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
726 END(threshold_interrupt)
727
728 #ifdef CONFIG_SMP
729 ENTRY(reschedule_interrupt)
730 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
731 END(reschedule_interrupt)
732
733 .macro INVALIDATE_ENTRY num
734 ENTRY(invalidate_interrupt\num)
735 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
736 END(invalidate_interrupt\num)
737 .endm
738
739 INVALIDATE_ENTRY 0
740 INVALIDATE_ENTRY 1
741 INVALIDATE_ENTRY 2
742 INVALIDATE_ENTRY 3
743 INVALIDATE_ENTRY 4
744 INVALIDATE_ENTRY 5
745 INVALIDATE_ENTRY 6
746 INVALIDATE_ENTRY 7
747
748 ENTRY(call_function_interrupt)
749 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
750 END(call_function_interrupt)
751 ENTRY(irq_move_cleanup_interrupt)
752 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
753 END(irq_move_cleanup_interrupt)
754 #endif
755
756 ENTRY(apic_timer_interrupt)
757 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
758 END(apic_timer_interrupt)
759
760 ENTRY(error_interrupt)
761 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
762 END(error_interrupt)
763
764 ENTRY(spurious_interrupt)
765 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
766 END(spurious_interrupt)
767
768 /*
769 * Exception entry points.
770 */
771 .macro zeroentry sym
772 INTR_FRAME
773 pushq $0 /* push error code/oldrax */
774 CFI_ADJUST_CFA_OFFSET 8
775 pushq %rax /* push real oldrax to the rdi slot */
776 CFI_ADJUST_CFA_OFFSET 8
777 CFI_REL_OFFSET rax,0
778 leaq \sym(%rip),%rax
779 jmp error_entry
780 CFI_ENDPROC
781 .endm
782
783 .macro errorentry sym
784 XCPT_FRAME
785 pushq %rax
786 CFI_ADJUST_CFA_OFFSET 8
787 CFI_REL_OFFSET rax,0
788 leaq \sym(%rip),%rax
789 jmp error_entry
790 CFI_ENDPROC
791 .endm
792
793 /* error code is on the stack already */
794 /* handle NMI like exceptions that can happen everywhere */
795 .macro paranoidentry sym, ist=0, irqtrace=1
796 SAVE_ALL
797 cld
798 movl $1,%ebx
799 movl $MSR_GS_BASE,%ecx
800 rdmsr
801 testl %edx,%edx
802 js 1f
803 SWAPGS
804 xorl %ebx,%ebx
805 1:
806 .if \ist
807 movq %gs:pda_data_offset, %rbp
808 .endif
809 movq %rsp,%rdi
810 movq ORIG_RAX(%rsp),%rsi
811 movq $-1,ORIG_RAX(%rsp)
812 .if \ist
813 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
814 .endif
815 call \sym
816 .if \ist
817 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
818 .endif
819 DISABLE_INTERRUPTS(CLBR_NONE)
820 .if \irqtrace
821 TRACE_IRQS_OFF
822 .endif
823 .endm
824
825 /*
826 * "Paranoid" exit path from exception stack.
827 * Paranoid because this is used by NMIs and cannot take
828 * any kernel state for granted.
829 * We don't do kernel preemption checks here, because only
830 * NMI should be common and it does not enable IRQs and
831 * cannot get reschedule ticks.
832 *
833 * "trace" is 0 for the NMI handler only, because irq-tracing
834 * is fundamentally NMI-unsafe. (we cannot change the soft and
835 * hard flags at once, atomically)
836 */
837 .macro paranoidexit trace=1
838 /* ebx: no swapgs flag */
839 paranoid_exit\trace:
840 testl %ebx,%ebx /* swapgs needed? */
841 jnz paranoid_restore\trace
842 testl $3,CS(%rsp)
843 jnz paranoid_userspace\trace
844 paranoid_swapgs\trace:
845 .if \trace
846 TRACE_IRQS_IRETQ 0
847 .endif
848 SWAPGS_UNSAFE_STACK
849 paranoid_restore\trace:
850 RESTORE_ALL 8
851 jmp irq_return
852 paranoid_userspace\trace:
853 GET_THREAD_INFO(%rcx)
854 movl threadinfo_flags(%rcx),%ebx
855 andl $_TIF_WORK_MASK,%ebx
856 jz paranoid_swapgs\trace
857 movq %rsp,%rdi /* &pt_regs */
858 call sync_regs
859 movq %rax,%rsp /* switch stack for scheduling */
860 testl $_TIF_NEED_RESCHED,%ebx
861 jnz paranoid_schedule\trace
862 movl %ebx,%edx /* arg3: thread flags */
863 .if \trace
864 TRACE_IRQS_ON
865 .endif
866 ENABLE_INTERRUPTS(CLBR_NONE)
867 xorl %esi,%esi /* arg2: oldset */
868 movq %rsp,%rdi /* arg1: &pt_regs */
869 call do_notify_resume
870 DISABLE_INTERRUPTS(CLBR_NONE)
871 .if \trace
872 TRACE_IRQS_OFF
873 .endif
874 jmp paranoid_userspace\trace
875 paranoid_schedule\trace:
876 .if \trace
877 TRACE_IRQS_ON
878 .endif
879 ENABLE_INTERRUPTS(CLBR_ANY)
880 call schedule
881 DISABLE_INTERRUPTS(CLBR_ANY)
882 .if \trace
883 TRACE_IRQS_OFF
884 .endif
885 jmp paranoid_userspace\trace
886 CFI_ENDPROC
887 .endm
888
889 /*
890 * Exception entry point. This expects an error code/orig_rax on the stack
891 * and the exception handler in %rax.
892 */
893 KPROBE_ENTRY(error_entry)
894 _frame RDI
895 CFI_REL_OFFSET rax,0
896 /* rdi slot contains rax, oldrax contains error code */
897 cld
898 subq $14*8,%rsp
899 CFI_ADJUST_CFA_OFFSET (14*8)
900 movq %rsi,13*8(%rsp)
901 CFI_REL_OFFSET rsi,RSI
902 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
903 CFI_REGISTER rax,rsi
904 movq %rdx,12*8(%rsp)
905 CFI_REL_OFFSET rdx,RDX
906 movq %rcx,11*8(%rsp)
907 CFI_REL_OFFSET rcx,RCX
908 movq %rsi,10*8(%rsp) /* store rax */
909 CFI_REL_OFFSET rax,RAX
910 movq %r8, 9*8(%rsp)
911 CFI_REL_OFFSET r8,R8
912 movq %r9, 8*8(%rsp)
913 CFI_REL_OFFSET r9,R9
914 movq %r10,7*8(%rsp)
915 CFI_REL_OFFSET r10,R10
916 movq %r11,6*8(%rsp)
917 CFI_REL_OFFSET r11,R11
918 movq %rbx,5*8(%rsp)
919 CFI_REL_OFFSET rbx,RBX
920 movq %rbp,4*8(%rsp)
921 CFI_REL_OFFSET rbp,RBP
922 movq %r12,3*8(%rsp)
923 CFI_REL_OFFSET r12,R12
924 movq %r13,2*8(%rsp)
925 CFI_REL_OFFSET r13,R13
926 movq %r14,1*8(%rsp)
927 CFI_REL_OFFSET r14,R14
928 movq %r15,(%rsp)
929 CFI_REL_OFFSET r15,R15
930 xorl %ebx,%ebx
931 testl $3,CS(%rsp)
932 je error_kernelspace
933 error_swapgs:
934 SWAPGS
935 error_sti:
936 movq %rdi,RDI(%rsp)
937 CFI_REL_OFFSET rdi,RDI
938 movq %rsp,%rdi
939 movq ORIG_RAX(%rsp),%rsi /* get error code */
940 movq $-1,ORIG_RAX(%rsp)
941 call *%rax
942 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
943 error_exit:
944 movl %ebx,%eax
945 RESTORE_REST
946 DISABLE_INTERRUPTS(CLBR_NONE)
947 TRACE_IRQS_OFF
948 GET_THREAD_INFO(%rcx)
949 testl %eax,%eax
950 jne retint_kernel
951 LOCKDEP_SYS_EXIT_IRQ
952 movl threadinfo_flags(%rcx),%edx
953 movl $_TIF_WORK_MASK,%edi
954 andl %edi,%edx
955 jnz retint_careful
956 jmp retint_swapgs
957 CFI_ENDPROC
958
959 error_kernelspace:
960 incl %ebx
961 /* There are two places in the kernel that can potentially fault with
962 usergs. Handle them here. The exception handlers after
963 iret run with kernel gs again, so don't set the user space flag.
964 B stepping K8s sometimes report an truncated RIP for IRET
965 exceptions returning to compat mode. Check for these here too. */
966 leaq irq_return(%rip),%rbp
967 cmpq %rbp,RIP(%rsp)
968 je error_swapgs
969 movl %ebp,%ebp /* zero extend */
970 cmpq %rbp,RIP(%rsp)
971 je error_swapgs
972 cmpq $gs_change,RIP(%rsp)
973 je error_swapgs
974 jmp error_sti
975 KPROBE_END(error_entry)
976
977 /* Reload gs selector with exception handling */
978 /* edi: new selector */
979 ENTRY(load_gs_index)
980 CFI_STARTPROC
981 pushf
982 CFI_ADJUST_CFA_OFFSET 8
983 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
984 SWAPGS
985 gs_change:
986 movl %edi,%gs
987 2: mfence /* workaround */
988 SWAPGS
989 popf
990 CFI_ADJUST_CFA_OFFSET -8
991 ret
992 CFI_ENDPROC
993 ENDPROC(load_gs_index)
994
995 .section __ex_table,"a"
996 .align 8
997 .quad gs_change,bad_gs
998 .previous
999 .section .fixup,"ax"
1000 /* running with kernelgs */
1001 bad_gs:
1002 SWAPGS /* switch back to user gs */
1003 xorl %eax,%eax
1004 movl %eax,%gs
1005 jmp 2b
1006 .previous
1007
1008 /*
1009 * Create a kernel thread.
1010 *
1011 * C extern interface:
1012 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1013 *
1014 * asm input arguments:
1015 * rdi: fn, rsi: arg, rdx: flags
1016 */
1017 ENTRY(kernel_thread)
1018 CFI_STARTPROC
1019 FAKE_STACK_FRAME $child_rip
1020 SAVE_ALL
1021
1022 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1023 movq %rdx,%rdi
1024 orq kernel_thread_flags(%rip),%rdi
1025 movq $-1, %rsi
1026 movq %rsp, %rdx
1027
1028 xorl %r8d,%r8d
1029 xorl %r9d,%r9d
1030
1031 # clone now
1032 call do_fork
1033 movq %rax,RAX(%rsp)
1034 xorl %edi,%edi
1035
1036 /*
1037 * It isn't worth to check for reschedule here,
1038 * so internally to the x86_64 port you can rely on kernel_thread()
1039 * not to reschedule the child before returning, this avoids the need
1040 * of hacks for example to fork off the per-CPU idle tasks.
1041 * [Hopefully no generic code relies on the reschedule -AK]
1042 */
1043 RESTORE_ALL
1044 UNFAKE_STACK_FRAME
1045 ret
1046 CFI_ENDPROC
1047 ENDPROC(kernel_thread)
1048
1049 child_rip:
1050 pushq $0 # fake return address
1051 CFI_STARTPROC
1052 /*
1053 * Here we are in the child and the registers are set as they were
1054 * at kernel_thread() invocation in the parent.
1055 */
1056 movq %rdi, %rax
1057 movq %rsi, %rdi
1058 call *%rax
1059 # exit
1060 mov %eax, %edi
1061 call do_exit
1062 CFI_ENDPROC
1063 ENDPROC(child_rip)
1064
1065 /*
1066 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1067 *
1068 * C extern interface:
1069 * extern long execve(char *name, char **argv, char **envp)
1070 *
1071 * asm input arguments:
1072 * rdi: name, rsi: argv, rdx: envp
1073 *
1074 * We want to fallback into:
1075 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
1076 *
1077 * do_sys_execve asm fallback arguments:
1078 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1079 */
1080 ENTRY(kernel_execve)
1081 CFI_STARTPROC
1082 FAKE_STACK_FRAME $0
1083 SAVE_ALL
1084 movq %rsp,%rcx
1085 call sys_execve
1086 movq %rax, RAX(%rsp)
1087 RESTORE_REST
1088 testq %rax,%rax
1089 je int_ret_from_sys_call
1090 RESTORE_ARGS
1091 UNFAKE_STACK_FRAME
1092 ret
1093 CFI_ENDPROC
1094 ENDPROC(kernel_execve)
1095
1096 KPROBE_ENTRY(page_fault)
1097 errorentry do_page_fault
1098 KPROBE_END(page_fault)
1099
1100 ENTRY(coprocessor_error)
1101 zeroentry do_coprocessor_error
1102 END(coprocessor_error)
1103
1104 ENTRY(simd_coprocessor_error)
1105 zeroentry do_simd_coprocessor_error
1106 END(simd_coprocessor_error)
1107
1108 ENTRY(device_not_available)
1109 zeroentry math_state_restore
1110 END(device_not_available)
1111
1112 /* runs on exception stack */
1113 KPROBE_ENTRY(debug)
1114 INTR_FRAME
1115 pushq $0
1116 CFI_ADJUST_CFA_OFFSET 8
1117 paranoidentry do_debug, DEBUG_STACK
1118 paranoidexit
1119 KPROBE_END(debug)
1120
1121 /* runs on exception stack */
1122 KPROBE_ENTRY(nmi)
1123 INTR_FRAME
1124 pushq $-1
1125 CFI_ADJUST_CFA_OFFSET 8
1126 paranoidentry do_nmi, 0, 0
1127 #ifdef CONFIG_TRACE_IRQFLAGS
1128 paranoidexit 0
1129 #else
1130 jmp paranoid_exit1
1131 CFI_ENDPROC
1132 #endif
1133 KPROBE_END(nmi)
1134
1135 KPROBE_ENTRY(int3)
1136 INTR_FRAME
1137 pushq $0
1138 CFI_ADJUST_CFA_OFFSET 8
1139 paranoidentry do_int3, DEBUG_STACK
1140 jmp paranoid_exit1
1141 CFI_ENDPROC
1142 KPROBE_END(int3)
1143
1144 ENTRY(overflow)
1145 zeroentry do_overflow
1146 END(overflow)
1147
1148 ENTRY(bounds)
1149 zeroentry do_bounds
1150 END(bounds)
1151
1152 ENTRY(invalid_op)
1153 zeroentry do_invalid_op
1154 END(invalid_op)
1155
1156 ENTRY(coprocessor_segment_overrun)
1157 zeroentry do_coprocessor_segment_overrun
1158 END(coprocessor_segment_overrun)
1159
1160 ENTRY(reserved)
1161 zeroentry do_reserved
1162 END(reserved)
1163
1164 /* runs on exception stack */
1165 ENTRY(double_fault)
1166 XCPT_FRAME
1167 paranoidentry do_double_fault
1168 jmp paranoid_exit1
1169 CFI_ENDPROC
1170 END(double_fault)
1171
1172 ENTRY(invalid_TSS)
1173 errorentry do_invalid_TSS
1174 END(invalid_TSS)
1175
1176 ENTRY(segment_not_present)
1177 errorentry do_segment_not_present
1178 END(segment_not_present)
1179
1180 /* runs on exception stack */
1181 ENTRY(stack_segment)
1182 XCPT_FRAME
1183 paranoidentry do_stack_segment
1184 jmp paranoid_exit1
1185 CFI_ENDPROC
1186 END(stack_segment)
1187
1188 KPROBE_ENTRY(general_protection)
1189 errorentry do_general_protection
1190 KPROBE_END(general_protection)
1191
1192 ENTRY(alignment_check)
1193 errorentry do_alignment_check
1194 END(alignment_check)
1195
1196 ENTRY(divide_error)
1197 zeroentry do_divide_error
1198 END(divide_error)
1199
1200 ENTRY(spurious_interrupt_bug)
1201 zeroentry do_spurious_interrupt_bug
1202 END(spurious_interrupt_bug)
1203
1204 #ifdef CONFIG_X86_MCE
1205 /* runs on exception stack */
1206 ENTRY(machine_check)
1207 INTR_FRAME
1208 pushq $0
1209 CFI_ADJUST_CFA_OFFSET 8
1210 paranoidentry do_machine_check
1211 jmp paranoid_exit1
1212 CFI_ENDPROC
1213 END(machine_check)
1214 #endif
1215
1216 /* Call softirq on interrupt stack. Interrupts are off. */
1217 ENTRY(call_softirq)
1218 CFI_STARTPROC
1219 push %rbp
1220 CFI_ADJUST_CFA_OFFSET 8
1221 CFI_REL_OFFSET rbp,0
1222 mov %rsp,%rbp
1223 CFI_DEF_CFA_REGISTER rbp
1224 incl %gs:pda_irqcount
1225 cmove %gs:pda_irqstackptr,%rsp
1226 push %rbp # backlink for old unwinder
1227 call __do_softirq
1228 leaveq
1229 CFI_DEF_CFA_REGISTER rsp
1230 CFI_ADJUST_CFA_OFFSET -8
1231 decl %gs:pda_irqcount
1232 ret
1233 CFI_ENDPROC
1234 ENDPROC(call_softirq)
1235
1236 KPROBE_ENTRY(ignore_sysret)
1237 CFI_STARTPROC
1238 mov $-ENOSYS,%eax
1239 sysret
1240 CFI_ENDPROC
1241 ENDPROC(ignore_sysret)