x86 BIOS interface for RTC on SGI UV
[GitHub/exynos8895/android_kernel_samsung_universal8895.git] / arch / x86 / kernel / entry_64.S
1 /*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 */
8
9 /*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
38 */
39
40 #include <linux/linkage.h>
41 #include <asm/segment.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
47 #include <asm/msr.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
51 #include <asm/page.h>
52 #include <asm/irqflags.h>
53 #include <asm/paravirt.h>
54 #include <asm/ftrace.h>
55
56 .code64
57
58 #ifdef CONFIG_FTRACE
59 #ifdef CONFIG_DYNAMIC_FTRACE
60 ENTRY(mcount)
61
62 subq $0x38, %rsp
63 movq %rax, (%rsp)
64 movq %rcx, 8(%rsp)
65 movq %rdx, 16(%rsp)
66 movq %rsi, 24(%rsp)
67 movq %rdi, 32(%rsp)
68 movq %r8, 40(%rsp)
69 movq %r9, 48(%rsp)
70
71 movq 0x38(%rsp), %rdi
72 subq $MCOUNT_INSN_SIZE, %rdi
73
74 .globl mcount_call
75 mcount_call:
76 call ftrace_stub
77
78 movq 48(%rsp), %r9
79 movq 40(%rsp), %r8
80 movq 32(%rsp), %rdi
81 movq 24(%rsp), %rsi
82 movq 16(%rsp), %rdx
83 movq 8(%rsp), %rcx
84 movq (%rsp), %rax
85 addq $0x38, %rsp
86
87 retq
88 END(mcount)
89
90 ENTRY(ftrace_caller)
91
92 /* taken from glibc */
93 subq $0x38, %rsp
94 movq %rax, (%rsp)
95 movq %rcx, 8(%rsp)
96 movq %rdx, 16(%rsp)
97 movq %rsi, 24(%rsp)
98 movq %rdi, 32(%rsp)
99 movq %r8, 40(%rsp)
100 movq %r9, 48(%rsp)
101
102 movq 0x38(%rsp), %rdi
103 movq 8(%rbp), %rsi
104 subq $MCOUNT_INSN_SIZE, %rdi
105
106 .globl ftrace_call
107 ftrace_call:
108 call ftrace_stub
109
110 movq 48(%rsp), %r9
111 movq 40(%rsp), %r8
112 movq 32(%rsp), %rdi
113 movq 24(%rsp), %rsi
114 movq 16(%rsp), %rdx
115 movq 8(%rsp), %rcx
116 movq (%rsp), %rax
117 addq $0x38, %rsp
118
119 .globl ftrace_stub
120 ftrace_stub:
121 retq
122 END(ftrace_caller)
123
124 #else /* ! CONFIG_DYNAMIC_FTRACE */
125 ENTRY(mcount)
126 cmpq $ftrace_stub, ftrace_trace_function
127 jnz trace
128 .globl ftrace_stub
129 ftrace_stub:
130 retq
131
132 trace:
133 /* taken from glibc */
134 subq $0x38, %rsp
135 movq %rax, (%rsp)
136 movq %rcx, 8(%rsp)
137 movq %rdx, 16(%rsp)
138 movq %rsi, 24(%rsp)
139 movq %rdi, 32(%rsp)
140 movq %r8, 40(%rsp)
141 movq %r9, 48(%rsp)
142
143 movq 0x38(%rsp), %rdi
144 movq 8(%rbp), %rsi
145 subq $MCOUNT_INSN_SIZE, %rdi
146
147 call *ftrace_trace_function
148
149 movq 48(%rsp), %r9
150 movq 40(%rsp), %r8
151 movq 32(%rsp), %rdi
152 movq 24(%rsp), %rsi
153 movq 16(%rsp), %rdx
154 movq 8(%rsp), %rcx
155 movq (%rsp), %rax
156 addq $0x38, %rsp
157
158 jmp ftrace_stub
159 END(mcount)
160 #endif /* CONFIG_DYNAMIC_FTRACE */
161 #endif /* CONFIG_FTRACE */
162
163 #ifndef CONFIG_PREEMPT
164 #define retint_kernel retint_restore_args
165 #endif
166
167 #ifdef CONFIG_PARAVIRT
168 ENTRY(native_usergs_sysret64)
169 swapgs
170 sysretq
171 #endif /* CONFIG_PARAVIRT */
172
173
174 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
175 #ifdef CONFIG_TRACE_IRQFLAGS
176 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
177 jnc 1f
178 TRACE_IRQS_ON
179 1:
180 #endif
181 .endm
182
183 /*
184 * C code is not supposed to know about undefined top of stack. Every time
185 * a C function with an pt_regs argument is called from the SYSCALL based
186 * fast path FIXUP_TOP_OF_STACK is needed.
187 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
188 * manipulation.
189 */
190
191 /* %rsp:at FRAMEEND */
192 .macro FIXUP_TOP_OF_STACK tmp
193 movq %gs:pda_oldrsp,\tmp
194 movq \tmp,RSP(%rsp)
195 movq $__USER_DS,SS(%rsp)
196 movq $__USER_CS,CS(%rsp)
197 movq $-1,RCX(%rsp)
198 movq R11(%rsp),\tmp /* get eflags */
199 movq \tmp,EFLAGS(%rsp)
200 .endm
201
202 .macro RESTORE_TOP_OF_STACK tmp,offset=0
203 movq RSP-\offset(%rsp),\tmp
204 movq \tmp,%gs:pda_oldrsp
205 movq EFLAGS-\offset(%rsp),\tmp
206 movq \tmp,R11-\offset(%rsp)
207 .endm
208
209 .macro FAKE_STACK_FRAME child_rip
210 /* push in order ss, rsp, eflags, cs, rip */
211 xorl %eax, %eax
212 pushq $__KERNEL_DS /* ss */
213 CFI_ADJUST_CFA_OFFSET 8
214 /*CFI_REL_OFFSET ss,0*/
215 pushq %rax /* rsp */
216 CFI_ADJUST_CFA_OFFSET 8
217 CFI_REL_OFFSET rsp,0
218 pushq $(1<<9) /* eflags - interrupts on */
219 CFI_ADJUST_CFA_OFFSET 8
220 /*CFI_REL_OFFSET rflags,0*/
221 pushq $__KERNEL_CS /* cs */
222 CFI_ADJUST_CFA_OFFSET 8
223 /*CFI_REL_OFFSET cs,0*/
224 pushq \child_rip /* rip */
225 CFI_ADJUST_CFA_OFFSET 8
226 CFI_REL_OFFSET rip,0
227 pushq %rax /* orig rax */
228 CFI_ADJUST_CFA_OFFSET 8
229 .endm
230
231 .macro UNFAKE_STACK_FRAME
232 addq $8*6, %rsp
233 CFI_ADJUST_CFA_OFFSET -(6*8)
234 .endm
235
236 .macro CFI_DEFAULT_STACK start=1
237 .if \start
238 CFI_STARTPROC simple
239 CFI_SIGNAL_FRAME
240 CFI_DEF_CFA rsp,SS+8
241 .else
242 CFI_DEF_CFA_OFFSET SS+8
243 .endif
244 CFI_REL_OFFSET r15,R15
245 CFI_REL_OFFSET r14,R14
246 CFI_REL_OFFSET r13,R13
247 CFI_REL_OFFSET r12,R12
248 CFI_REL_OFFSET rbp,RBP
249 CFI_REL_OFFSET rbx,RBX
250 CFI_REL_OFFSET r11,R11
251 CFI_REL_OFFSET r10,R10
252 CFI_REL_OFFSET r9,R9
253 CFI_REL_OFFSET r8,R8
254 CFI_REL_OFFSET rax,RAX
255 CFI_REL_OFFSET rcx,RCX
256 CFI_REL_OFFSET rdx,RDX
257 CFI_REL_OFFSET rsi,RSI
258 CFI_REL_OFFSET rdi,RDI
259 CFI_REL_OFFSET rip,RIP
260 /*CFI_REL_OFFSET cs,CS*/
261 /*CFI_REL_OFFSET rflags,EFLAGS*/
262 CFI_REL_OFFSET rsp,RSP
263 /*CFI_REL_OFFSET ss,SS*/
264 .endm
265 /*
266 * A newly forked process directly context switches into this.
267 */
268 /* rdi: prev */
269 ENTRY(ret_from_fork)
270 CFI_DEFAULT_STACK
271 push kernel_eflags(%rip)
272 CFI_ADJUST_CFA_OFFSET 4
273 popf # reset kernel eflags
274 CFI_ADJUST_CFA_OFFSET -4
275 call schedule_tail
276 GET_THREAD_INFO(%rcx)
277 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
278 jnz rff_trace
279 rff_action:
280 RESTORE_REST
281 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
282 je int_ret_from_sys_call
283 testl $_TIF_IA32,TI_flags(%rcx)
284 jnz int_ret_from_sys_call
285 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
286 jmp ret_from_sys_call
287 rff_trace:
288 movq %rsp,%rdi
289 call syscall_trace_leave
290 GET_THREAD_INFO(%rcx)
291 jmp rff_action
292 CFI_ENDPROC
293 END(ret_from_fork)
294
295 /*
296 * System call entry. Upto 6 arguments in registers are supported.
297 *
298 * SYSCALL does not save anything on the stack and does not change the
299 * stack pointer.
300 */
301
302 /*
303 * Register setup:
304 * rax system call number
305 * rdi arg0
306 * rcx return address for syscall/sysret, C arg3
307 * rsi arg1
308 * rdx arg2
309 * r10 arg3 (--> moved to rcx for C)
310 * r8 arg4
311 * r9 arg5
312 * r11 eflags for syscall/sysret, temporary for C
313 * r12-r15,rbp,rbx saved by C code, not touched.
314 *
315 * Interrupts are off on entry.
316 * Only called from user space.
317 *
318 * XXX if we had a free scratch register we could save the RSP into the stack frame
319 * and report it properly in ps. Unfortunately we haven't.
320 *
321 * When user can change the frames always force IRET. That is because
322 * it deals with uncanonical addresses better. SYSRET has trouble
323 * with them due to bugs in both AMD and Intel CPUs.
324 */
325
326 ENTRY(system_call)
327 CFI_STARTPROC simple
328 CFI_SIGNAL_FRAME
329 CFI_DEF_CFA rsp,PDA_STACKOFFSET
330 CFI_REGISTER rip,rcx
331 /*CFI_REGISTER rflags,r11*/
332 SWAPGS_UNSAFE_STACK
333 /*
334 * A hypervisor implementation might want to use a label
335 * after the swapgs, so that it can do the swapgs
336 * for the guest and jump here on syscall.
337 */
338 ENTRY(system_call_after_swapgs)
339
340 movq %rsp,%gs:pda_oldrsp
341 movq %gs:pda_kernelstack,%rsp
342 /*
343 * No need to follow this irqs off/on section - it's straight
344 * and short:
345 */
346 ENABLE_INTERRUPTS(CLBR_NONE)
347 SAVE_ARGS 8,1
348 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
349 movq %rcx,RIP-ARGOFFSET(%rsp)
350 CFI_REL_OFFSET rip,RIP-ARGOFFSET
351 GET_THREAD_INFO(%rcx)
352 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
353 TI_flags(%rcx)
354 jnz tracesys
355 cmpq $__NR_syscall_max,%rax
356 ja badsys
357 movq %r10,%rcx
358 call *sys_call_table(,%rax,8) # XXX: rip relative
359 movq %rax,RAX-ARGOFFSET(%rsp)
360 /*
361 * Syscall return path ending with SYSRET (fast path)
362 * Has incomplete stack frame and undefined top of stack.
363 */
364 ret_from_sys_call:
365 movl $_TIF_ALLWORK_MASK,%edi
366 /* edi: flagmask */
367 sysret_check:
368 LOCKDEP_SYS_EXIT
369 GET_THREAD_INFO(%rcx)
370 DISABLE_INTERRUPTS(CLBR_NONE)
371 TRACE_IRQS_OFF
372 movl TI_flags(%rcx),%edx
373 andl %edi,%edx
374 jnz sysret_careful
375 CFI_REMEMBER_STATE
376 /*
377 * sysretq will re-enable interrupts:
378 */
379 TRACE_IRQS_ON
380 movq RIP-ARGOFFSET(%rsp),%rcx
381 CFI_REGISTER rip,rcx
382 RESTORE_ARGS 0,-ARG_SKIP,1
383 /*CFI_REGISTER rflags,r11*/
384 movq %gs:pda_oldrsp, %rsp
385 USERGS_SYSRET64
386
387 CFI_RESTORE_STATE
388 /* Handle reschedules */
389 /* edx: work, edi: workmask */
390 sysret_careful:
391 bt $TIF_NEED_RESCHED,%edx
392 jnc sysret_signal
393 TRACE_IRQS_ON
394 ENABLE_INTERRUPTS(CLBR_NONE)
395 pushq %rdi
396 CFI_ADJUST_CFA_OFFSET 8
397 call schedule
398 popq %rdi
399 CFI_ADJUST_CFA_OFFSET -8
400 jmp sysret_check
401
402 /* Handle a signal */
403 sysret_signal:
404 TRACE_IRQS_ON
405 ENABLE_INTERRUPTS(CLBR_NONE)
406 testl $_TIF_DO_NOTIFY_MASK,%edx
407 jz 1f
408
409 /* Really a signal */
410 /* edx: work flags (arg3) */
411 leaq do_notify_resume(%rip),%rax
412 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
413 xorl %esi,%esi # oldset -> arg2
414 call ptregscall_common
415 1: movl $_TIF_WORK_MASK,%edi
416 /* Use IRET because user could have changed frame. This
417 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
418 DISABLE_INTERRUPTS(CLBR_NONE)
419 TRACE_IRQS_OFF
420 jmp int_with_check
421
422 badsys:
423 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
424 jmp ret_from_sys_call
425
426 /* Do syscall tracing */
427 tracesys:
428 SAVE_REST
429 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
430 FIXUP_TOP_OF_STACK %rdi
431 movq %rsp,%rdi
432 call syscall_trace_enter
433 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
434 RESTORE_REST
435 cmpq $__NR_syscall_max,%rax
436 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
437 movq %r10,%rcx /* fixup for C */
438 call *sys_call_table(,%rax,8)
439 movq %rax,RAX-ARGOFFSET(%rsp)
440 /* Use IRET because user could have changed frame */
441
442 /*
443 * Syscall return path ending with IRET.
444 * Has correct top of stack, but partial stack frame.
445 */
446 .globl int_ret_from_sys_call
447 int_ret_from_sys_call:
448 DISABLE_INTERRUPTS(CLBR_NONE)
449 TRACE_IRQS_OFF
450 testl $3,CS-ARGOFFSET(%rsp)
451 je retint_restore_args
452 movl $_TIF_ALLWORK_MASK,%edi
453 /* edi: mask to check */
454 int_with_check:
455 LOCKDEP_SYS_EXIT_IRQ
456 GET_THREAD_INFO(%rcx)
457 movl TI_flags(%rcx),%edx
458 andl %edi,%edx
459 jnz int_careful
460 andl $~TS_COMPAT,TI_status(%rcx)
461 jmp retint_swapgs
462
463 /* Either reschedule or signal or syscall exit tracking needed. */
464 /* First do a reschedule test. */
465 /* edx: work, edi: workmask */
466 int_careful:
467 bt $TIF_NEED_RESCHED,%edx
468 jnc int_very_careful
469 TRACE_IRQS_ON
470 ENABLE_INTERRUPTS(CLBR_NONE)
471 pushq %rdi
472 CFI_ADJUST_CFA_OFFSET 8
473 call schedule
474 popq %rdi
475 CFI_ADJUST_CFA_OFFSET -8
476 DISABLE_INTERRUPTS(CLBR_NONE)
477 TRACE_IRQS_OFF
478 jmp int_with_check
479
480 /* handle signals and tracing -- both require a full stack frame */
481 int_very_careful:
482 TRACE_IRQS_ON
483 ENABLE_INTERRUPTS(CLBR_NONE)
484 SAVE_REST
485 /* Check for syscall exit trace */
486 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
487 jz int_signal
488 pushq %rdi
489 CFI_ADJUST_CFA_OFFSET 8
490 leaq 8(%rsp),%rdi # &ptregs -> arg1
491 call syscall_trace_leave
492 popq %rdi
493 CFI_ADJUST_CFA_OFFSET -8
494 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
495 jmp int_restore_rest
496
497 int_signal:
498 testl $_TIF_DO_NOTIFY_MASK,%edx
499 jz 1f
500 movq %rsp,%rdi # &ptregs -> arg1
501 xorl %esi,%esi # oldset -> arg2
502 call do_notify_resume
503 1: movl $_TIF_WORK_MASK,%edi
504 int_restore_rest:
505 RESTORE_REST
506 DISABLE_INTERRUPTS(CLBR_NONE)
507 TRACE_IRQS_OFF
508 jmp int_with_check
509 CFI_ENDPROC
510 END(system_call)
511
512 /*
513 * Certain special system calls that need to save a complete full stack frame.
514 */
515
516 .macro PTREGSCALL label,func,arg
517 .globl \label
518 \label:
519 leaq \func(%rip),%rax
520 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
521 jmp ptregscall_common
522 END(\label)
523 .endm
524
525 CFI_STARTPROC
526
527 PTREGSCALL stub_clone, sys_clone, %r8
528 PTREGSCALL stub_fork, sys_fork, %rdi
529 PTREGSCALL stub_vfork, sys_vfork, %rdi
530 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
531 PTREGSCALL stub_iopl, sys_iopl, %rsi
532
533 ENTRY(ptregscall_common)
534 popq %r11
535 CFI_ADJUST_CFA_OFFSET -8
536 CFI_REGISTER rip, r11
537 SAVE_REST
538 movq %r11, %r15
539 CFI_REGISTER rip, r15
540 FIXUP_TOP_OF_STACK %r11
541 call *%rax
542 RESTORE_TOP_OF_STACK %r11
543 movq %r15, %r11
544 CFI_REGISTER rip, r11
545 RESTORE_REST
546 pushq %r11
547 CFI_ADJUST_CFA_OFFSET 8
548 CFI_REL_OFFSET rip, 0
549 ret
550 CFI_ENDPROC
551 END(ptregscall_common)
552
553 ENTRY(stub_execve)
554 CFI_STARTPROC
555 popq %r11
556 CFI_ADJUST_CFA_OFFSET -8
557 CFI_REGISTER rip, r11
558 SAVE_REST
559 FIXUP_TOP_OF_STACK %r11
560 movq %rsp, %rcx
561 call sys_execve
562 RESTORE_TOP_OF_STACK %r11
563 movq %rax,RAX(%rsp)
564 RESTORE_REST
565 jmp int_ret_from_sys_call
566 CFI_ENDPROC
567 END(stub_execve)
568
569 /*
570 * sigreturn is special because it needs to restore all registers on return.
571 * This cannot be done with SYSRET, so use the IRET return path instead.
572 */
573 ENTRY(stub_rt_sigreturn)
574 CFI_STARTPROC
575 addq $8, %rsp
576 CFI_ADJUST_CFA_OFFSET -8
577 SAVE_REST
578 movq %rsp,%rdi
579 FIXUP_TOP_OF_STACK %r11
580 call sys_rt_sigreturn
581 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
582 RESTORE_REST
583 jmp int_ret_from_sys_call
584 CFI_ENDPROC
585 END(stub_rt_sigreturn)
586
587 /*
588 * initial frame state for interrupts and exceptions
589 */
590 .macro _frame ref
591 CFI_STARTPROC simple
592 CFI_SIGNAL_FRAME
593 CFI_DEF_CFA rsp,SS+8-\ref
594 /*CFI_REL_OFFSET ss,SS-\ref*/
595 CFI_REL_OFFSET rsp,RSP-\ref
596 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
597 /*CFI_REL_OFFSET cs,CS-\ref*/
598 CFI_REL_OFFSET rip,RIP-\ref
599 .endm
600
601 /* initial frame state for interrupts (and exceptions without error code) */
602 #define INTR_FRAME _frame RIP
603 /* initial frame state for exceptions with error code (and interrupts with
604 vector already pushed) */
605 #define XCPT_FRAME _frame ORIG_RAX
606
607 /*
608 * Interrupt entry/exit.
609 *
610 * Interrupt entry points save only callee clobbered registers in fast path.
611 *
612 * Entry runs with interrupts off.
613 */
614
615 /* 0(%rsp): interrupt number */
616 .macro interrupt func
617 cld
618 SAVE_ARGS
619 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
620 pushq %rbp
621 CFI_ADJUST_CFA_OFFSET 8
622 CFI_REL_OFFSET rbp, 0
623 movq %rsp,%rbp
624 CFI_DEF_CFA_REGISTER rbp
625 testl $3,CS(%rdi)
626 je 1f
627 SWAPGS
628 /* irqcount is used to check if a CPU is already on an interrupt
629 stack or not. While this is essentially redundant with preempt_count
630 it is a little cheaper to use a separate counter in the PDA
631 (short of moving irq_enter into assembly, which would be too
632 much work) */
633 1: incl %gs:pda_irqcount
634 cmoveq %gs:pda_irqstackptr,%rsp
635 push %rbp # backlink for old unwinder
636 /*
637 * We entered an interrupt context - irqs are off:
638 */
639 TRACE_IRQS_OFF
640 call \func
641 .endm
642
643 ENTRY(common_interrupt)
644 XCPT_FRAME
645 interrupt do_IRQ
646 /* 0(%rsp): oldrsp-ARGOFFSET */
647 ret_from_intr:
648 DISABLE_INTERRUPTS(CLBR_NONE)
649 TRACE_IRQS_OFF
650 decl %gs:pda_irqcount
651 leaveq
652 CFI_DEF_CFA_REGISTER rsp
653 CFI_ADJUST_CFA_OFFSET -8
654 exit_intr:
655 GET_THREAD_INFO(%rcx)
656 testl $3,CS-ARGOFFSET(%rsp)
657 je retint_kernel
658
659 /* Interrupt came from user space */
660 /*
661 * Has a correct top of stack, but a partial stack frame
662 * %rcx: thread info. Interrupts off.
663 */
664 retint_with_reschedule:
665 movl $_TIF_WORK_MASK,%edi
666 retint_check:
667 LOCKDEP_SYS_EXIT_IRQ
668 movl TI_flags(%rcx),%edx
669 andl %edi,%edx
670 CFI_REMEMBER_STATE
671 jnz retint_careful
672
673 retint_swapgs: /* return to user-space */
674 /*
675 * The iretq could re-enable interrupts:
676 */
677 DISABLE_INTERRUPTS(CLBR_ANY)
678 TRACE_IRQS_IRETQ
679 SWAPGS
680 jmp restore_args
681
682 retint_restore_args: /* return to kernel space */
683 DISABLE_INTERRUPTS(CLBR_ANY)
684 /*
685 * The iretq could re-enable interrupts:
686 */
687 TRACE_IRQS_IRETQ
688 restore_args:
689 RESTORE_ARGS 0,8,0
690
691 irq_return:
692 INTERRUPT_RETURN
693
694 .section __ex_table, "a"
695 .quad irq_return, bad_iret
696 .previous
697
698 #ifdef CONFIG_PARAVIRT
699 ENTRY(native_iret)
700 iretq
701
702 .section __ex_table,"a"
703 .quad native_iret, bad_iret
704 .previous
705 #endif
706
707 .section .fixup,"ax"
708 bad_iret:
709 /*
710 * The iret traps when the %cs or %ss being restored is bogus.
711 * We've lost the original trap vector and error code.
712 * #GPF is the most likely one to get for an invalid selector.
713 * So pretend we completed the iret and took the #GPF in user mode.
714 *
715 * We are now running with the kernel GS after exception recovery.
716 * But error_entry expects us to have user GS to match the user %cs,
717 * so swap back.
718 */
719 pushq $0
720
721 SWAPGS
722 jmp general_protection
723
724 .previous
725
726 /* edi: workmask, edx: work */
727 retint_careful:
728 CFI_RESTORE_STATE
729 bt $TIF_NEED_RESCHED,%edx
730 jnc retint_signal
731 TRACE_IRQS_ON
732 ENABLE_INTERRUPTS(CLBR_NONE)
733 pushq %rdi
734 CFI_ADJUST_CFA_OFFSET 8
735 call schedule
736 popq %rdi
737 CFI_ADJUST_CFA_OFFSET -8
738 GET_THREAD_INFO(%rcx)
739 DISABLE_INTERRUPTS(CLBR_NONE)
740 TRACE_IRQS_OFF
741 jmp retint_check
742
743 retint_signal:
744 testl $_TIF_DO_NOTIFY_MASK,%edx
745 jz retint_swapgs
746 TRACE_IRQS_ON
747 ENABLE_INTERRUPTS(CLBR_NONE)
748 SAVE_REST
749 movq $-1,ORIG_RAX(%rsp)
750 xorl %esi,%esi # oldset
751 movq %rsp,%rdi # &pt_regs
752 call do_notify_resume
753 RESTORE_REST
754 DISABLE_INTERRUPTS(CLBR_NONE)
755 TRACE_IRQS_OFF
756 GET_THREAD_INFO(%rcx)
757 jmp retint_with_reschedule
758
759 #ifdef CONFIG_PREEMPT
760 /* Returning to kernel space. Check if we need preemption */
761 /* rcx: threadinfo. interrupts off. */
762 ENTRY(retint_kernel)
763 cmpl $0,TI_preempt_count(%rcx)
764 jnz retint_restore_args
765 bt $TIF_NEED_RESCHED,TI_flags(%rcx)
766 jnc retint_restore_args
767 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
768 jnc retint_restore_args
769 call preempt_schedule_irq
770 jmp exit_intr
771 #endif
772
773 CFI_ENDPROC
774 END(common_interrupt)
775
776 /*
777 * APIC interrupts.
778 */
779 .macro apicinterrupt num,func
780 INTR_FRAME
781 pushq $~(\num)
782 CFI_ADJUST_CFA_OFFSET 8
783 interrupt \func
784 jmp ret_from_intr
785 CFI_ENDPROC
786 .endm
787
788 ENTRY(thermal_interrupt)
789 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
790 END(thermal_interrupt)
791
792 ENTRY(threshold_interrupt)
793 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
794 END(threshold_interrupt)
795
796 #ifdef CONFIG_SMP
797 ENTRY(reschedule_interrupt)
798 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
799 END(reschedule_interrupt)
800
801 .macro INVALIDATE_ENTRY num
802 ENTRY(invalidate_interrupt\num)
803 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
804 END(invalidate_interrupt\num)
805 .endm
806
807 INVALIDATE_ENTRY 0
808 INVALIDATE_ENTRY 1
809 INVALIDATE_ENTRY 2
810 INVALIDATE_ENTRY 3
811 INVALIDATE_ENTRY 4
812 INVALIDATE_ENTRY 5
813 INVALIDATE_ENTRY 6
814 INVALIDATE_ENTRY 7
815
816 ENTRY(call_function_interrupt)
817 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
818 END(call_function_interrupt)
819 ENTRY(call_function_single_interrupt)
820 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
821 END(call_function_single_interrupt)
822 ENTRY(irq_move_cleanup_interrupt)
823 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
824 END(irq_move_cleanup_interrupt)
825 #endif
826
827 ENTRY(apic_timer_interrupt)
828 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
829 END(apic_timer_interrupt)
830
831 ENTRY(uv_bau_message_intr1)
832 apicinterrupt 220,uv_bau_message_interrupt
833 END(uv_bau_message_intr1)
834
835 ENTRY(error_interrupt)
836 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
837 END(error_interrupt)
838
839 ENTRY(spurious_interrupt)
840 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
841 END(spurious_interrupt)
842
843 /*
844 * Exception entry points.
845 */
846 .macro zeroentry sym
847 INTR_FRAME
848 PARAVIRT_ADJUST_EXCEPTION_FRAME
849 pushq $0 /* push error code/oldrax */
850 CFI_ADJUST_CFA_OFFSET 8
851 pushq %rax /* push real oldrax to the rdi slot */
852 CFI_ADJUST_CFA_OFFSET 8
853 CFI_REL_OFFSET rax,0
854 leaq \sym(%rip),%rax
855 jmp error_entry
856 CFI_ENDPROC
857 .endm
858
859 .macro errorentry sym
860 XCPT_FRAME
861 PARAVIRT_ADJUST_EXCEPTION_FRAME
862 pushq %rax
863 CFI_ADJUST_CFA_OFFSET 8
864 CFI_REL_OFFSET rax,0
865 leaq \sym(%rip),%rax
866 jmp error_entry
867 CFI_ENDPROC
868 .endm
869
870 /* error code is on the stack already */
871 /* handle NMI like exceptions that can happen everywhere */
872 .macro paranoidentry sym, ist=0, irqtrace=1
873 SAVE_ALL
874 cld
875 movl $1,%ebx
876 movl $MSR_GS_BASE,%ecx
877 rdmsr
878 testl %edx,%edx
879 js 1f
880 SWAPGS
881 xorl %ebx,%ebx
882 1:
883 .if \ist
884 movq %gs:pda_data_offset, %rbp
885 .endif
886 movq %rsp,%rdi
887 movq ORIG_RAX(%rsp),%rsi
888 movq $-1,ORIG_RAX(%rsp)
889 .if \ist
890 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
891 .endif
892 call \sym
893 .if \ist
894 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
895 .endif
896 DISABLE_INTERRUPTS(CLBR_NONE)
897 .if \irqtrace
898 TRACE_IRQS_OFF
899 .endif
900 .endm
901
902 /*
903 * "Paranoid" exit path from exception stack.
904 * Paranoid because this is used by NMIs and cannot take
905 * any kernel state for granted.
906 * We don't do kernel preemption checks here, because only
907 * NMI should be common and it does not enable IRQs and
908 * cannot get reschedule ticks.
909 *
910 * "trace" is 0 for the NMI handler only, because irq-tracing
911 * is fundamentally NMI-unsafe. (we cannot change the soft and
912 * hard flags at once, atomically)
913 */
914 .macro paranoidexit trace=1
915 /* ebx: no swapgs flag */
916 paranoid_exit\trace:
917 testl %ebx,%ebx /* swapgs needed? */
918 jnz paranoid_restore\trace
919 testl $3,CS(%rsp)
920 jnz paranoid_userspace\trace
921 paranoid_swapgs\trace:
922 .if \trace
923 TRACE_IRQS_IRETQ 0
924 .endif
925 SWAPGS_UNSAFE_STACK
926 paranoid_restore\trace:
927 RESTORE_ALL 8
928 jmp irq_return
929 paranoid_userspace\trace:
930 GET_THREAD_INFO(%rcx)
931 movl TI_flags(%rcx),%ebx
932 andl $_TIF_WORK_MASK,%ebx
933 jz paranoid_swapgs\trace
934 movq %rsp,%rdi /* &pt_regs */
935 call sync_regs
936 movq %rax,%rsp /* switch stack for scheduling */
937 testl $_TIF_NEED_RESCHED,%ebx
938 jnz paranoid_schedule\trace
939 movl %ebx,%edx /* arg3: thread flags */
940 .if \trace
941 TRACE_IRQS_ON
942 .endif
943 ENABLE_INTERRUPTS(CLBR_NONE)
944 xorl %esi,%esi /* arg2: oldset */
945 movq %rsp,%rdi /* arg1: &pt_regs */
946 call do_notify_resume
947 DISABLE_INTERRUPTS(CLBR_NONE)
948 .if \trace
949 TRACE_IRQS_OFF
950 .endif
951 jmp paranoid_userspace\trace
952 paranoid_schedule\trace:
953 .if \trace
954 TRACE_IRQS_ON
955 .endif
956 ENABLE_INTERRUPTS(CLBR_ANY)
957 call schedule
958 DISABLE_INTERRUPTS(CLBR_ANY)
959 .if \trace
960 TRACE_IRQS_OFF
961 .endif
962 jmp paranoid_userspace\trace
963 CFI_ENDPROC
964 .endm
965
966 /*
967 * Exception entry point. This expects an error code/orig_rax on the stack
968 * and the exception handler in %rax.
969 */
970 KPROBE_ENTRY(error_entry)
971 _frame RDI
972 CFI_REL_OFFSET rax,0
973 /* rdi slot contains rax, oldrax contains error code */
974 cld
975 subq $14*8,%rsp
976 CFI_ADJUST_CFA_OFFSET (14*8)
977 movq %rsi,13*8(%rsp)
978 CFI_REL_OFFSET rsi,RSI
979 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
980 CFI_REGISTER rax,rsi
981 movq %rdx,12*8(%rsp)
982 CFI_REL_OFFSET rdx,RDX
983 movq %rcx,11*8(%rsp)
984 CFI_REL_OFFSET rcx,RCX
985 movq %rsi,10*8(%rsp) /* store rax */
986 CFI_REL_OFFSET rax,RAX
987 movq %r8, 9*8(%rsp)
988 CFI_REL_OFFSET r8,R8
989 movq %r9, 8*8(%rsp)
990 CFI_REL_OFFSET r9,R9
991 movq %r10,7*8(%rsp)
992 CFI_REL_OFFSET r10,R10
993 movq %r11,6*8(%rsp)
994 CFI_REL_OFFSET r11,R11
995 movq %rbx,5*8(%rsp)
996 CFI_REL_OFFSET rbx,RBX
997 movq %rbp,4*8(%rsp)
998 CFI_REL_OFFSET rbp,RBP
999 movq %r12,3*8(%rsp)
1000 CFI_REL_OFFSET r12,R12
1001 movq %r13,2*8(%rsp)
1002 CFI_REL_OFFSET r13,R13
1003 movq %r14,1*8(%rsp)
1004 CFI_REL_OFFSET r14,R14
1005 movq %r15,(%rsp)
1006 CFI_REL_OFFSET r15,R15
1007 xorl %ebx,%ebx
1008 testl $3,CS(%rsp)
1009 je error_kernelspace
1010 error_swapgs:
1011 SWAPGS
1012 error_sti:
1013 movq %rdi,RDI(%rsp)
1014 CFI_REL_OFFSET rdi,RDI
1015 movq %rsp,%rdi
1016 movq ORIG_RAX(%rsp),%rsi /* get error code */
1017 movq $-1,ORIG_RAX(%rsp)
1018 call *%rax
1019 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1020 error_exit:
1021 movl %ebx,%eax
1022 RESTORE_REST
1023 DISABLE_INTERRUPTS(CLBR_NONE)
1024 TRACE_IRQS_OFF
1025 GET_THREAD_INFO(%rcx)
1026 testl %eax,%eax
1027 jne retint_kernel
1028 LOCKDEP_SYS_EXIT_IRQ
1029 movl TI_flags(%rcx),%edx
1030 movl $_TIF_WORK_MASK,%edi
1031 andl %edi,%edx
1032 jnz retint_careful
1033 jmp retint_swapgs
1034 CFI_ENDPROC
1035
1036 error_kernelspace:
1037 incl %ebx
1038 /* There are two places in the kernel that can potentially fault with
1039 usergs. Handle them here. The exception handlers after
1040 iret run with kernel gs again, so don't set the user space flag.
1041 B stepping K8s sometimes report an truncated RIP for IRET
1042 exceptions returning to compat mode. Check for these here too. */
1043 leaq irq_return(%rip),%rcx
1044 cmpq %rcx,RIP(%rsp)
1045 je error_swapgs
1046 movl %ecx,%ecx /* zero extend */
1047 cmpq %rcx,RIP(%rsp)
1048 je error_swapgs
1049 cmpq $gs_change,RIP(%rsp)
1050 je error_swapgs
1051 jmp error_sti
1052 KPROBE_END(error_entry)
1053
1054 /* Reload gs selector with exception handling */
1055 /* edi: new selector */
1056 ENTRY(native_load_gs_index)
1057 CFI_STARTPROC
1058 pushf
1059 CFI_ADJUST_CFA_OFFSET 8
1060 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1061 SWAPGS
1062 gs_change:
1063 movl %edi,%gs
1064 2: mfence /* workaround */
1065 SWAPGS
1066 popf
1067 CFI_ADJUST_CFA_OFFSET -8
1068 ret
1069 CFI_ENDPROC
1070 ENDPROC(native_load_gs_index)
1071
1072 .section __ex_table,"a"
1073 .align 8
1074 .quad gs_change,bad_gs
1075 .previous
1076 .section .fixup,"ax"
1077 /* running with kernelgs */
1078 bad_gs:
1079 SWAPGS /* switch back to user gs */
1080 xorl %eax,%eax
1081 movl %eax,%gs
1082 jmp 2b
1083 .previous
1084
1085 /*
1086 * Create a kernel thread.
1087 *
1088 * C extern interface:
1089 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1090 *
1091 * asm input arguments:
1092 * rdi: fn, rsi: arg, rdx: flags
1093 */
1094 ENTRY(kernel_thread)
1095 CFI_STARTPROC
1096 FAKE_STACK_FRAME $child_rip
1097 SAVE_ALL
1098
1099 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1100 movq %rdx,%rdi
1101 orq kernel_thread_flags(%rip),%rdi
1102 movq $-1, %rsi
1103 movq %rsp, %rdx
1104
1105 xorl %r8d,%r8d
1106 xorl %r9d,%r9d
1107
1108 # clone now
1109 call do_fork
1110 movq %rax,RAX(%rsp)
1111 xorl %edi,%edi
1112
1113 /*
1114 * It isn't worth to check for reschedule here,
1115 * so internally to the x86_64 port you can rely on kernel_thread()
1116 * not to reschedule the child before returning, this avoids the need
1117 * of hacks for example to fork off the per-CPU idle tasks.
1118 * [Hopefully no generic code relies on the reschedule -AK]
1119 */
1120 RESTORE_ALL
1121 UNFAKE_STACK_FRAME
1122 ret
1123 CFI_ENDPROC
1124 ENDPROC(kernel_thread)
1125
1126 child_rip:
1127 pushq $0 # fake return address
1128 CFI_STARTPROC
1129 /*
1130 * Here we are in the child and the registers are set as they were
1131 * at kernel_thread() invocation in the parent.
1132 */
1133 movq %rdi, %rax
1134 movq %rsi, %rdi
1135 call *%rax
1136 # exit
1137 mov %eax, %edi
1138 call do_exit
1139 CFI_ENDPROC
1140 ENDPROC(child_rip)
1141
1142 /*
1143 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1144 *
1145 * C extern interface:
1146 * extern long execve(char *name, char **argv, char **envp)
1147 *
1148 * asm input arguments:
1149 * rdi: name, rsi: argv, rdx: envp
1150 *
1151 * We want to fallback into:
1152 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
1153 *
1154 * do_sys_execve asm fallback arguments:
1155 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1156 */
1157 ENTRY(kernel_execve)
1158 CFI_STARTPROC
1159 FAKE_STACK_FRAME $0
1160 SAVE_ALL
1161 movq %rsp,%rcx
1162 call sys_execve
1163 movq %rax, RAX(%rsp)
1164 RESTORE_REST
1165 testq %rax,%rax
1166 je int_ret_from_sys_call
1167 RESTORE_ARGS
1168 UNFAKE_STACK_FRAME
1169 ret
1170 CFI_ENDPROC
1171 ENDPROC(kernel_execve)
1172
1173 KPROBE_ENTRY(page_fault)
1174 errorentry do_page_fault
1175 KPROBE_END(page_fault)
1176
1177 ENTRY(coprocessor_error)
1178 zeroentry do_coprocessor_error
1179 END(coprocessor_error)
1180
1181 ENTRY(simd_coprocessor_error)
1182 zeroentry do_simd_coprocessor_error
1183 END(simd_coprocessor_error)
1184
1185 ENTRY(device_not_available)
1186 zeroentry math_state_restore
1187 END(device_not_available)
1188
1189 /* runs on exception stack */
1190 KPROBE_ENTRY(debug)
1191 INTR_FRAME
1192 pushq $0
1193 CFI_ADJUST_CFA_OFFSET 8
1194 paranoidentry do_debug, DEBUG_STACK
1195 paranoidexit
1196 KPROBE_END(debug)
1197
1198 /* runs on exception stack */
1199 KPROBE_ENTRY(nmi)
1200 INTR_FRAME
1201 pushq $-1
1202 CFI_ADJUST_CFA_OFFSET 8
1203 paranoidentry do_nmi, 0, 0
1204 #ifdef CONFIG_TRACE_IRQFLAGS
1205 paranoidexit 0
1206 #else
1207 jmp paranoid_exit1
1208 CFI_ENDPROC
1209 #endif
1210 KPROBE_END(nmi)
1211
1212 KPROBE_ENTRY(int3)
1213 INTR_FRAME
1214 pushq $0
1215 CFI_ADJUST_CFA_OFFSET 8
1216 paranoidentry do_int3, DEBUG_STACK
1217 jmp paranoid_exit1
1218 CFI_ENDPROC
1219 KPROBE_END(int3)
1220
1221 ENTRY(overflow)
1222 zeroentry do_overflow
1223 END(overflow)
1224
1225 ENTRY(bounds)
1226 zeroentry do_bounds
1227 END(bounds)
1228
1229 ENTRY(invalid_op)
1230 zeroentry do_invalid_op
1231 END(invalid_op)
1232
1233 ENTRY(coprocessor_segment_overrun)
1234 zeroentry do_coprocessor_segment_overrun
1235 END(coprocessor_segment_overrun)
1236
1237 /* runs on exception stack */
1238 ENTRY(double_fault)
1239 XCPT_FRAME
1240 paranoidentry do_double_fault
1241 jmp paranoid_exit1
1242 CFI_ENDPROC
1243 END(double_fault)
1244
1245 ENTRY(invalid_TSS)
1246 errorentry do_invalid_TSS
1247 END(invalid_TSS)
1248
1249 ENTRY(segment_not_present)
1250 errorentry do_segment_not_present
1251 END(segment_not_present)
1252
1253 /* runs on exception stack */
1254 ENTRY(stack_segment)
1255 XCPT_FRAME
1256 paranoidentry do_stack_segment
1257 jmp paranoid_exit1
1258 CFI_ENDPROC
1259 END(stack_segment)
1260
1261 KPROBE_ENTRY(general_protection)
1262 errorentry do_general_protection
1263 KPROBE_END(general_protection)
1264
1265 ENTRY(alignment_check)
1266 errorentry do_alignment_check
1267 END(alignment_check)
1268
1269 ENTRY(divide_error)
1270 zeroentry do_divide_error
1271 END(divide_error)
1272
1273 ENTRY(spurious_interrupt_bug)
1274 zeroentry do_spurious_interrupt_bug
1275 END(spurious_interrupt_bug)
1276
1277 #ifdef CONFIG_X86_MCE
1278 /* runs on exception stack */
1279 ENTRY(machine_check)
1280 INTR_FRAME
1281 pushq $0
1282 CFI_ADJUST_CFA_OFFSET 8
1283 paranoidentry do_machine_check
1284 jmp paranoid_exit1
1285 CFI_ENDPROC
1286 END(machine_check)
1287 #endif
1288
1289 /* Call softirq on interrupt stack. Interrupts are off. */
1290 ENTRY(call_softirq)
1291 CFI_STARTPROC
1292 push %rbp
1293 CFI_ADJUST_CFA_OFFSET 8
1294 CFI_REL_OFFSET rbp,0
1295 mov %rsp,%rbp
1296 CFI_DEF_CFA_REGISTER rbp
1297 incl %gs:pda_irqcount
1298 cmove %gs:pda_irqstackptr,%rsp
1299 push %rbp # backlink for old unwinder
1300 call __do_softirq
1301 leaveq
1302 CFI_DEF_CFA_REGISTER rsp
1303 CFI_ADJUST_CFA_OFFSET -8
1304 decl %gs:pda_irqcount
1305 ret
1306 CFI_ENDPROC
1307 ENDPROC(call_softirq)
1308
1309 KPROBE_ENTRY(ignore_sysret)
1310 CFI_STARTPROC
1311 mov $-ENOSYS,%eax
1312 sysret
1313 CFI_ENDPROC
1314 ENDPROC(ignore_sysret)