Merge branches 'x86/urgent', 'x86/amd-iommu', 'x86/apic', 'x86/cleanups', 'x86/core...
[GitHub/exynos8895/android_kernel_samsung_universal8895.git] / arch / x86 / kernel / entry_64.S
1 /*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 */
8
9 /*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
38 */
39
40 #include <linux/linkage.h>
41 #include <asm/segment.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
47 #include <asm/msr.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
51 #include <asm/page.h>
52 #include <asm/irqflags.h>
53 #include <asm/paravirt.h>
54 #include <asm/ftrace.h>
55
56 .code64
57
58 #ifdef CONFIG_FTRACE
59 #ifdef CONFIG_DYNAMIC_FTRACE
60 ENTRY(mcount)
61
62 subq $0x38, %rsp
63 movq %rax, (%rsp)
64 movq %rcx, 8(%rsp)
65 movq %rdx, 16(%rsp)
66 movq %rsi, 24(%rsp)
67 movq %rdi, 32(%rsp)
68 movq %r8, 40(%rsp)
69 movq %r9, 48(%rsp)
70
71 movq 0x38(%rsp), %rdi
72 subq $MCOUNT_INSN_SIZE, %rdi
73
74 .globl mcount_call
75 mcount_call:
76 call ftrace_stub
77
78 movq 48(%rsp), %r9
79 movq 40(%rsp), %r8
80 movq 32(%rsp), %rdi
81 movq 24(%rsp), %rsi
82 movq 16(%rsp), %rdx
83 movq 8(%rsp), %rcx
84 movq (%rsp), %rax
85 addq $0x38, %rsp
86
87 retq
88 END(mcount)
89
90 ENTRY(ftrace_caller)
91
92 /* taken from glibc */
93 subq $0x38, %rsp
94 movq %rax, (%rsp)
95 movq %rcx, 8(%rsp)
96 movq %rdx, 16(%rsp)
97 movq %rsi, 24(%rsp)
98 movq %rdi, 32(%rsp)
99 movq %r8, 40(%rsp)
100 movq %r9, 48(%rsp)
101
102 movq 0x38(%rsp), %rdi
103 movq 8(%rbp), %rsi
104 subq $MCOUNT_INSN_SIZE, %rdi
105
106 .globl ftrace_call
107 ftrace_call:
108 call ftrace_stub
109
110 movq 48(%rsp), %r9
111 movq 40(%rsp), %r8
112 movq 32(%rsp), %rdi
113 movq 24(%rsp), %rsi
114 movq 16(%rsp), %rdx
115 movq 8(%rsp), %rcx
116 movq (%rsp), %rax
117 addq $0x38, %rsp
118
119 .globl ftrace_stub
120 ftrace_stub:
121 retq
122 END(ftrace_caller)
123
124 #else /* ! CONFIG_DYNAMIC_FTRACE */
125 ENTRY(mcount)
126 cmpq $ftrace_stub, ftrace_trace_function
127 jnz trace
128 .globl ftrace_stub
129 ftrace_stub:
130 retq
131
132 trace:
133 /* taken from glibc */
134 subq $0x38, %rsp
135 movq %rax, (%rsp)
136 movq %rcx, 8(%rsp)
137 movq %rdx, 16(%rsp)
138 movq %rsi, 24(%rsp)
139 movq %rdi, 32(%rsp)
140 movq %r8, 40(%rsp)
141 movq %r9, 48(%rsp)
142
143 movq 0x38(%rsp), %rdi
144 movq 8(%rbp), %rsi
145 subq $MCOUNT_INSN_SIZE, %rdi
146
147 call *ftrace_trace_function
148
149 movq 48(%rsp), %r9
150 movq 40(%rsp), %r8
151 movq 32(%rsp), %rdi
152 movq 24(%rsp), %rsi
153 movq 16(%rsp), %rdx
154 movq 8(%rsp), %rcx
155 movq (%rsp), %rax
156 addq $0x38, %rsp
157
158 jmp ftrace_stub
159 END(mcount)
160 #endif /* CONFIG_DYNAMIC_FTRACE */
161 #endif /* CONFIG_FTRACE */
162
163 #ifndef CONFIG_PREEMPT
164 #define retint_kernel retint_restore_args
165 #endif
166
167 #ifdef CONFIG_PARAVIRT
168 ENTRY(native_usergs_sysret64)
169 swapgs
170 sysretq
171 #endif /* CONFIG_PARAVIRT */
172
173
174 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
175 #ifdef CONFIG_TRACE_IRQFLAGS
176 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
177 jnc 1f
178 TRACE_IRQS_ON
179 1:
180 #endif
181 .endm
182
183 /*
184 * C code is not supposed to know about undefined top of stack. Every time
185 * a C function with an pt_regs argument is called from the SYSCALL based
186 * fast path FIXUP_TOP_OF_STACK is needed.
187 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
188 * manipulation.
189 */
190
191 /* %rsp:at FRAMEEND */
192 .macro FIXUP_TOP_OF_STACK tmp
193 movq %gs:pda_oldrsp,\tmp
194 movq \tmp,RSP(%rsp)
195 movq $__USER_DS,SS(%rsp)
196 movq $__USER_CS,CS(%rsp)
197 movq $-1,RCX(%rsp)
198 movq R11(%rsp),\tmp /* get eflags */
199 movq \tmp,EFLAGS(%rsp)
200 .endm
201
202 .macro RESTORE_TOP_OF_STACK tmp,offset=0
203 movq RSP-\offset(%rsp),\tmp
204 movq \tmp,%gs:pda_oldrsp
205 movq EFLAGS-\offset(%rsp),\tmp
206 movq \tmp,R11-\offset(%rsp)
207 .endm
208
209 .macro FAKE_STACK_FRAME child_rip
210 /* push in order ss, rsp, eflags, cs, rip */
211 xorl %eax, %eax
212 pushq $__KERNEL_DS /* ss */
213 CFI_ADJUST_CFA_OFFSET 8
214 /*CFI_REL_OFFSET ss,0*/
215 pushq %rax /* rsp */
216 CFI_ADJUST_CFA_OFFSET 8
217 CFI_REL_OFFSET rsp,0
218 pushq $(1<<9) /* eflags - interrupts on */
219 CFI_ADJUST_CFA_OFFSET 8
220 /*CFI_REL_OFFSET rflags,0*/
221 pushq $__KERNEL_CS /* cs */
222 CFI_ADJUST_CFA_OFFSET 8
223 /*CFI_REL_OFFSET cs,0*/
224 pushq \child_rip /* rip */
225 CFI_ADJUST_CFA_OFFSET 8
226 CFI_REL_OFFSET rip,0
227 pushq %rax /* orig rax */
228 CFI_ADJUST_CFA_OFFSET 8
229 .endm
230
231 .macro UNFAKE_STACK_FRAME
232 addq $8*6, %rsp
233 CFI_ADJUST_CFA_OFFSET -(6*8)
234 .endm
235
236 .macro CFI_DEFAULT_STACK start=1
237 .if \start
238 CFI_STARTPROC simple
239 CFI_SIGNAL_FRAME
240 CFI_DEF_CFA rsp,SS+8
241 .else
242 CFI_DEF_CFA_OFFSET SS+8
243 .endif
244 CFI_REL_OFFSET r15,R15
245 CFI_REL_OFFSET r14,R14
246 CFI_REL_OFFSET r13,R13
247 CFI_REL_OFFSET r12,R12
248 CFI_REL_OFFSET rbp,RBP
249 CFI_REL_OFFSET rbx,RBX
250 CFI_REL_OFFSET r11,R11
251 CFI_REL_OFFSET r10,R10
252 CFI_REL_OFFSET r9,R9
253 CFI_REL_OFFSET r8,R8
254 CFI_REL_OFFSET rax,RAX
255 CFI_REL_OFFSET rcx,RCX
256 CFI_REL_OFFSET rdx,RDX
257 CFI_REL_OFFSET rsi,RSI
258 CFI_REL_OFFSET rdi,RDI
259 CFI_REL_OFFSET rip,RIP
260 /*CFI_REL_OFFSET cs,CS*/
261 /*CFI_REL_OFFSET rflags,EFLAGS*/
262 CFI_REL_OFFSET rsp,RSP
263 /*CFI_REL_OFFSET ss,SS*/
264 .endm
265 /*
266 * A newly forked process directly context switches into this.
267 */
268 /* rdi: prev */
269 ENTRY(ret_from_fork)
270 CFI_DEFAULT_STACK
271 push kernel_eflags(%rip)
272 CFI_ADJUST_CFA_OFFSET 4
273 popf # reset kernel eflags
274 CFI_ADJUST_CFA_OFFSET -4
275 call schedule_tail
276 GET_THREAD_INFO(%rcx)
277 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
278 jnz rff_trace
279 rff_action:
280 RESTORE_REST
281 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
282 je int_ret_from_sys_call
283 testl $_TIF_IA32,TI_flags(%rcx)
284 jnz int_ret_from_sys_call
285 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
286 jmp ret_from_sys_call
287 rff_trace:
288 movq %rsp,%rdi
289 call syscall_trace_leave
290 GET_THREAD_INFO(%rcx)
291 jmp rff_action
292 CFI_ENDPROC
293 END(ret_from_fork)
294
295 /*
296 * System call entry. Upto 6 arguments in registers are supported.
297 *
298 * SYSCALL does not save anything on the stack and does not change the
299 * stack pointer.
300 */
301
302 /*
303 * Register setup:
304 * rax system call number
305 * rdi arg0
306 * rcx return address for syscall/sysret, C arg3
307 * rsi arg1
308 * rdx arg2
309 * r10 arg3 (--> moved to rcx for C)
310 * r8 arg4
311 * r9 arg5
312 * r11 eflags for syscall/sysret, temporary for C
313 * r12-r15,rbp,rbx saved by C code, not touched.
314 *
315 * Interrupts are off on entry.
316 * Only called from user space.
317 *
318 * XXX if we had a free scratch register we could save the RSP into the stack frame
319 * and report it properly in ps. Unfortunately we haven't.
320 *
321 * When user can change the frames always force IRET. That is because
322 * it deals with uncanonical addresses better. SYSRET has trouble
323 * with them due to bugs in both AMD and Intel CPUs.
324 */
325
326 ENTRY(system_call)
327 CFI_STARTPROC simple
328 CFI_SIGNAL_FRAME
329 CFI_DEF_CFA rsp,PDA_STACKOFFSET
330 CFI_REGISTER rip,rcx
331 /*CFI_REGISTER rflags,r11*/
332 SWAPGS_UNSAFE_STACK
333 /*
334 * A hypervisor implementation might want to use a label
335 * after the swapgs, so that it can do the swapgs
336 * for the guest and jump here on syscall.
337 */
338 ENTRY(system_call_after_swapgs)
339
340 movq %rsp,%gs:pda_oldrsp
341 movq %gs:pda_kernelstack,%rsp
342 /*
343 * No need to follow this irqs off/on section - it's straight
344 * and short:
345 */
346 ENABLE_INTERRUPTS(CLBR_NONE)
347 SAVE_ARGS 8,1
348 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
349 movq %rcx,RIP-ARGOFFSET(%rsp)
350 CFI_REL_OFFSET rip,RIP-ARGOFFSET
351 GET_THREAD_INFO(%rcx)
352 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
353 jnz tracesys
354 cmpq $__NR_syscall_max,%rax
355 ja badsys
356 movq %r10,%rcx
357 call *sys_call_table(,%rax,8) # XXX: rip relative
358 movq %rax,RAX-ARGOFFSET(%rsp)
359 /*
360 * Syscall return path ending with SYSRET (fast path)
361 * Has incomplete stack frame and undefined top of stack.
362 */
363 ret_from_sys_call:
364 movl $_TIF_ALLWORK_MASK,%edi
365 /* edi: flagmask */
366 sysret_check:
367 LOCKDEP_SYS_EXIT
368 GET_THREAD_INFO(%rcx)
369 DISABLE_INTERRUPTS(CLBR_NONE)
370 TRACE_IRQS_OFF
371 movl TI_flags(%rcx),%edx
372 andl %edi,%edx
373 jnz sysret_careful
374 CFI_REMEMBER_STATE
375 /*
376 * sysretq will re-enable interrupts:
377 */
378 TRACE_IRQS_ON
379 movq RIP-ARGOFFSET(%rsp),%rcx
380 CFI_REGISTER rip,rcx
381 RESTORE_ARGS 0,-ARG_SKIP,1
382 /*CFI_REGISTER rflags,r11*/
383 movq %gs:pda_oldrsp, %rsp
384 USERGS_SYSRET64
385
386 CFI_RESTORE_STATE
387 /* Handle reschedules */
388 /* edx: work, edi: workmask */
389 sysret_careful:
390 bt $TIF_NEED_RESCHED,%edx
391 jnc sysret_signal
392 TRACE_IRQS_ON
393 ENABLE_INTERRUPTS(CLBR_NONE)
394 pushq %rdi
395 CFI_ADJUST_CFA_OFFSET 8
396 call schedule
397 popq %rdi
398 CFI_ADJUST_CFA_OFFSET -8
399 jmp sysret_check
400
401 /* Handle a signal */
402 sysret_signal:
403 TRACE_IRQS_ON
404 ENABLE_INTERRUPTS(CLBR_NONE)
405 testl $_TIF_DO_NOTIFY_MASK,%edx
406 jz 1f
407
408 /* Really a signal */
409 /* edx: work flags (arg3) */
410 leaq do_notify_resume(%rip),%rax
411 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
412 xorl %esi,%esi # oldset -> arg2
413 call ptregscall_common
414 1: movl $_TIF_WORK_MASK,%edi
415 /* Use IRET because user could have changed frame. This
416 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
417 DISABLE_INTERRUPTS(CLBR_NONE)
418 TRACE_IRQS_OFF
419 jmp int_with_check
420
421 badsys:
422 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
423 jmp ret_from_sys_call
424
425 /* Do syscall tracing */
426 tracesys:
427 SAVE_REST
428 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
429 FIXUP_TOP_OF_STACK %rdi
430 movq %rsp,%rdi
431 call syscall_trace_enter
432 /*
433 * Reload arg registers from stack in case ptrace changed them.
434 * We don't reload %rax because syscall_trace_enter() returned
435 * the value it wants us to use in the table lookup.
436 */
437 LOAD_ARGS ARGOFFSET, 1
438 RESTORE_REST
439 cmpq $__NR_syscall_max,%rax
440 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
441 movq %r10,%rcx /* fixup for C */
442 call *sys_call_table(,%rax,8)
443 movq %rax,RAX-ARGOFFSET(%rsp)
444 /* Use IRET because user could have changed frame */
445
446 /*
447 * Syscall return path ending with IRET.
448 * Has correct top of stack, but partial stack frame.
449 */
450 .globl int_ret_from_sys_call
451 int_ret_from_sys_call:
452 DISABLE_INTERRUPTS(CLBR_NONE)
453 TRACE_IRQS_OFF
454 testl $3,CS-ARGOFFSET(%rsp)
455 je retint_restore_args
456 movl $_TIF_ALLWORK_MASK,%edi
457 /* edi: mask to check */
458 int_with_check:
459 LOCKDEP_SYS_EXIT_IRQ
460 GET_THREAD_INFO(%rcx)
461 movl TI_flags(%rcx),%edx
462 andl %edi,%edx
463 jnz int_careful
464 andl $~TS_COMPAT,TI_status(%rcx)
465 jmp retint_swapgs
466
467 /* Either reschedule or signal or syscall exit tracking needed. */
468 /* First do a reschedule test. */
469 /* edx: work, edi: workmask */
470 int_careful:
471 bt $TIF_NEED_RESCHED,%edx
472 jnc int_very_careful
473 TRACE_IRQS_ON
474 ENABLE_INTERRUPTS(CLBR_NONE)
475 pushq %rdi
476 CFI_ADJUST_CFA_OFFSET 8
477 call schedule
478 popq %rdi
479 CFI_ADJUST_CFA_OFFSET -8
480 DISABLE_INTERRUPTS(CLBR_NONE)
481 TRACE_IRQS_OFF
482 jmp int_with_check
483
484 /* handle signals and tracing -- both require a full stack frame */
485 int_very_careful:
486 TRACE_IRQS_ON
487 ENABLE_INTERRUPTS(CLBR_NONE)
488 SAVE_REST
489 /* Check for syscall exit trace */
490 testl $_TIF_WORK_SYSCALL_EXIT,%edx
491 jz int_signal
492 pushq %rdi
493 CFI_ADJUST_CFA_OFFSET 8
494 leaq 8(%rsp),%rdi # &ptregs -> arg1
495 call syscall_trace_leave
496 popq %rdi
497 CFI_ADJUST_CFA_OFFSET -8
498 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
499 jmp int_restore_rest
500
501 int_signal:
502 testl $_TIF_DO_NOTIFY_MASK,%edx
503 jz 1f
504 movq %rsp,%rdi # &ptregs -> arg1
505 xorl %esi,%esi # oldset -> arg2
506 call do_notify_resume
507 1: movl $_TIF_WORK_MASK,%edi
508 int_restore_rest:
509 RESTORE_REST
510 DISABLE_INTERRUPTS(CLBR_NONE)
511 TRACE_IRQS_OFF
512 jmp int_with_check
513 CFI_ENDPROC
514 END(system_call)
515
516 /*
517 * Certain special system calls that need to save a complete full stack frame.
518 */
519
520 .macro PTREGSCALL label,func,arg
521 .globl \label
522 \label:
523 leaq \func(%rip),%rax
524 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
525 jmp ptregscall_common
526 END(\label)
527 .endm
528
529 CFI_STARTPROC
530
531 PTREGSCALL stub_clone, sys_clone, %r8
532 PTREGSCALL stub_fork, sys_fork, %rdi
533 PTREGSCALL stub_vfork, sys_vfork, %rdi
534 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
535 PTREGSCALL stub_iopl, sys_iopl, %rsi
536
537 ENTRY(ptregscall_common)
538 popq %r11
539 CFI_ADJUST_CFA_OFFSET -8
540 CFI_REGISTER rip, r11
541 SAVE_REST
542 movq %r11, %r15
543 CFI_REGISTER rip, r15
544 FIXUP_TOP_OF_STACK %r11
545 call *%rax
546 RESTORE_TOP_OF_STACK %r11
547 movq %r15, %r11
548 CFI_REGISTER rip, r11
549 RESTORE_REST
550 pushq %r11
551 CFI_ADJUST_CFA_OFFSET 8
552 CFI_REL_OFFSET rip, 0
553 ret
554 CFI_ENDPROC
555 END(ptregscall_common)
556
557 ENTRY(stub_execve)
558 CFI_STARTPROC
559 popq %r11
560 CFI_ADJUST_CFA_OFFSET -8
561 CFI_REGISTER rip, r11
562 SAVE_REST
563 FIXUP_TOP_OF_STACK %r11
564 movq %rsp, %rcx
565 call sys_execve
566 RESTORE_TOP_OF_STACK %r11
567 movq %rax,RAX(%rsp)
568 RESTORE_REST
569 jmp int_ret_from_sys_call
570 CFI_ENDPROC
571 END(stub_execve)
572
573 /*
574 * sigreturn is special because it needs to restore all registers on return.
575 * This cannot be done with SYSRET, so use the IRET return path instead.
576 */
577 ENTRY(stub_rt_sigreturn)
578 CFI_STARTPROC
579 addq $8, %rsp
580 CFI_ADJUST_CFA_OFFSET -8
581 SAVE_REST
582 movq %rsp,%rdi
583 FIXUP_TOP_OF_STACK %r11
584 call sys_rt_sigreturn
585 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
586 RESTORE_REST
587 jmp int_ret_from_sys_call
588 CFI_ENDPROC
589 END(stub_rt_sigreturn)
590
591 /*
592 * initial frame state for interrupts and exceptions
593 */
594 .macro _frame ref
595 CFI_STARTPROC simple
596 CFI_SIGNAL_FRAME
597 CFI_DEF_CFA rsp,SS+8-\ref
598 /*CFI_REL_OFFSET ss,SS-\ref*/
599 CFI_REL_OFFSET rsp,RSP-\ref
600 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
601 /*CFI_REL_OFFSET cs,CS-\ref*/
602 CFI_REL_OFFSET rip,RIP-\ref
603 .endm
604
605 /* initial frame state for interrupts (and exceptions without error code) */
606 #define INTR_FRAME _frame RIP
607 /* initial frame state for exceptions with error code (and interrupts with
608 vector already pushed) */
609 #define XCPT_FRAME _frame ORIG_RAX
610
611 /*
612 * Interrupt entry/exit.
613 *
614 * Interrupt entry points save only callee clobbered registers in fast path.
615 *
616 * Entry runs with interrupts off.
617 */
618
619 /* 0(%rsp): interrupt number */
620 .macro interrupt func
621 cld
622 SAVE_ARGS
623 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
624 pushq %rbp
625 CFI_ADJUST_CFA_OFFSET 8
626 CFI_REL_OFFSET rbp, 0
627 movq %rsp,%rbp
628 CFI_DEF_CFA_REGISTER rbp
629 testl $3,CS(%rdi)
630 je 1f
631 SWAPGS
632 /* irqcount is used to check if a CPU is already on an interrupt
633 stack or not. While this is essentially redundant with preempt_count
634 it is a little cheaper to use a separate counter in the PDA
635 (short of moving irq_enter into assembly, which would be too
636 much work) */
637 1: incl %gs:pda_irqcount
638 cmoveq %gs:pda_irqstackptr,%rsp
639 push %rbp # backlink for old unwinder
640 /*
641 * We entered an interrupt context - irqs are off:
642 */
643 TRACE_IRQS_OFF
644 call \func
645 .endm
646
647 ENTRY(common_interrupt)
648 XCPT_FRAME
649 interrupt do_IRQ
650 /* 0(%rsp): oldrsp-ARGOFFSET */
651 ret_from_intr:
652 DISABLE_INTERRUPTS(CLBR_NONE)
653 TRACE_IRQS_OFF
654 decl %gs:pda_irqcount
655 leaveq
656 CFI_DEF_CFA_REGISTER rsp
657 CFI_ADJUST_CFA_OFFSET -8
658 exit_intr:
659 GET_THREAD_INFO(%rcx)
660 testl $3,CS-ARGOFFSET(%rsp)
661 je retint_kernel
662
663 /* Interrupt came from user space */
664 /*
665 * Has a correct top of stack, but a partial stack frame
666 * %rcx: thread info. Interrupts off.
667 */
668 retint_with_reschedule:
669 movl $_TIF_WORK_MASK,%edi
670 retint_check:
671 LOCKDEP_SYS_EXIT_IRQ
672 movl TI_flags(%rcx),%edx
673 andl %edi,%edx
674 CFI_REMEMBER_STATE
675 jnz retint_careful
676
677 retint_swapgs: /* return to user-space */
678 /*
679 * The iretq could re-enable interrupts:
680 */
681 DISABLE_INTERRUPTS(CLBR_ANY)
682 TRACE_IRQS_IRETQ
683 SWAPGS
684 jmp restore_args
685
686 retint_restore_args: /* return to kernel space */
687 DISABLE_INTERRUPTS(CLBR_ANY)
688 /*
689 * The iretq could re-enable interrupts:
690 */
691 TRACE_IRQS_IRETQ
692 restore_args:
693 RESTORE_ARGS 0,8,0
694
695 irq_return:
696 INTERRUPT_RETURN
697
698 .section __ex_table, "a"
699 .quad irq_return, bad_iret
700 .previous
701
702 #ifdef CONFIG_PARAVIRT
703 ENTRY(native_iret)
704 iretq
705
706 .section __ex_table,"a"
707 .quad native_iret, bad_iret
708 .previous
709 #endif
710
711 .section .fixup,"ax"
712 bad_iret:
713 /*
714 * The iret traps when the %cs or %ss being restored is bogus.
715 * We've lost the original trap vector and error code.
716 * #GPF is the most likely one to get for an invalid selector.
717 * So pretend we completed the iret and took the #GPF in user mode.
718 *
719 * We are now running with the kernel GS after exception recovery.
720 * But error_entry expects us to have user GS to match the user %cs,
721 * so swap back.
722 */
723 pushq $0
724
725 SWAPGS
726 jmp general_protection
727
728 .previous
729
730 /* edi: workmask, edx: work */
731 retint_careful:
732 CFI_RESTORE_STATE
733 bt $TIF_NEED_RESCHED,%edx
734 jnc retint_signal
735 TRACE_IRQS_ON
736 ENABLE_INTERRUPTS(CLBR_NONE)
737 pushq %rdi
738 CFI_ADJUST_CFA_OFFSET 8
739 call schedule
740 popq %rdi
741 CFI_ADJUST_CFA_OFFSET -8
742 GET_THREAD_INFO(%rcx)
743 DISABLE_INTERRUPTS(CLBR_NONE)
744 TRACE_IRQS_OFF
745 jmp retint_check
746
747 retint_signal:
748 testl $_TIF_DO_NOTIFY_MASK,%edx
749 jz retint_swapgs
750 TRACE_IRQS_ON
751 ENABLE_INTERRUPTS(CLBR_NONE)
752 SAVE_REST
753 movq $-1,ORIG_RAX(%rsp)
754 xorl %esi,%esi # oldset
755 movq %rsp,%rdi # &pt_regs
756 call do_notify_resume
757 RESTORE_REST
758 DISABLE_INTERRUPTS(CLBR_NONE)
759 TRACE_IRQS_OFF
760 GET_THREAD_INFO(%rcx)
761 jmp retint_with_reschedule
762
763 #ifdef CONFIG_PREEMPT
764 /* Returning to kernel space. Check if we need preemption */
765 /* rcx: threadinfo. interrupts off. */
766 ENTRY(retint_kernel)
767 cmpl $0,TI_preempt_count(%rcx)
768 jnz retint_restore_args
769 bt $TIF_NEED_RESCHED,TI_flags(%rcx)
770 jnc retint_restore_args
771 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
772 jnc retint_restore_args
773 call preempt_schedule_irq
774 jmp exit_intr
775 #endif
776
777 CFI_ENDPROC
778 END(common_interrupt)
779
780 /*
781 * APIC interrupts.
782 */
783 .macro apicinterrupt num,func
784 INTR_FRAME
785 pushq $~(\num)
786 CFI_ADJUST_CFA_OFFSET 8
787 interrupt \func
788 jmp ret_from_intr
789 CFI_ENDPROC
790 .endm
791
792 ENTRY(thermal_interrupt)
793 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
794 END(thermal_interrupt)
795
796 ENTRY(threshold_interrupt)
797 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
798 END(threshold_interrupt)
799
800 #ifdef CONFIG_SMP
801 ENTRY(reschedule_interrupt)
802 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
803 END(reschedule_interrupt)
804
805 .macro INVALIDATE_ENTRY num
806 ENTRY(invalidate_interrupt\num)
807 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
808 END(invalidate_interrupt\num)
809 .endm
810
811 INVALIDATE_ENTRY 0
812 INVALIDATE_ENTRY 1
813 INVALIDATE_ENTRY 2
814 INVALIDATE_ENTRY 3
815 INVALIDATE_ENTRY 4
816 INVALIDATE_ENTRY 5
817 INVALIDATE_ENTRY 6
818 INVALIDATE_ENTRY 7
819
820 ENTRY(call_function_interrupt)
821 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
822 END(call_function_interrupt)
823 ENTRY(call_function_single_interrupt)
824 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
825 END(call_function_single_interrupt)
826 ENTRY(irq_move_cleanup_interrupt)
827 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
828 END(irq_move_cleanup_interrupt)
829 #endif
830
831 ENTRY(apic_timer_interrupt)
832 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
833 END(apic_timer_interrupt)
834
835 ENTRY(uv_bau_message_intr1)
836 apicinterrupt 220,uv_bau_message_interrupt
837 END(uv_bau_message_intr1)
838
839 ENTRY(error_interrupt)
840 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
841 END(error_interrupt)
842
843 ENTRY(spurious_interrupt)
844 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
845 END(spurious_interrupt)
846
847 /*
848 * Exception entry points.
849 */
850 .macro zeroentry sym
851 INTR_FRAME
852 PARAVIRT_ADJUST_EXCEPTION_FRAME
853 pushq $0 /* push error code/oldrax */
854 CFI_ADJUST_CFA_OFFSET 8
855 pushq %rax /* push real oldrax to the rdi slot */
856 CFI_ADJUST_CFA_OFFSET 8
857 CFI_REL_OFFSET rax,0
858 leaq \sym(%rip),%rax
859 jmp error_entry
860 CFI_ENDPROC
861 .endm
862
863 .macro errorentry sym
864 XCPT_FRAME
865 PARAVIRT_ADJUST_EXCEPTION_FRAME
866 pushq %rax
867 CFI_ADJUST_CFA_OFFSET 8
868 CFI_REL_OFFSET rax,0
869 leaq \sym(%rip),%rax
870 jmp error_entry
871 CFI_ENDPROC
872 .endm
873
874 /* error code is on the stack already */
875 /* handle NMI like exceptions that can happen everywhere */
876 .macro paranoidentry sym, ist=0, irqtrace=1
877 SAVE_ALL
878 cld
879 movl $1,%ebx
880 movl $MSR_GS_BASE,%ecx
881 rdmsr
882 testl %edx,%edx
883 js 1f
884 SWAPGS
885 xorl %ebx,%ebx
886 1:
887 .if \ist
888 movq %gs:pda_data_offset, %rbp
889 .endif
890 movq %rsp,%rdi
891 movq ORIG_RAX(%rsp),%rsi
892 movq $-1,ORIG_RAX(%rsp)
893 .if \ist
894 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
895 .endif
896 call \sym
897 .if \ist
898 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
899 .endif
900 DISABLE_INTERRUPTS(CLBR_NONE)
901 .if \irqtrace
902 TRACE_IRQS_OFF
903 .endif
904 .endm
905
906 /*
907 * "Paranoid" exit path from exception stack.
908 * Paranoid because this is used by NMIs and cannot take
909 * any kernel state for granted.
910 * We don't do kernel preemption checks here, because only
911 * NMI should be common and it does not enable IRQs and
912 * cannot get reschedule ticks.
913 *
914 * "trace" is 0 for the NMI handler only, because irq-tracing
915 * is fundamentally NMI-unsafe. (we cannot change the soft and
916 * hard flags at once, atomically)
917 */
918 .macro paranoidexit trace=1
919 /* ebx: no swapgs flag */
920 paranoid_exit\trace:
921 testl %ebx,%ebx /* swapgs needed? */
922 jnz paranoid_restore\trace
923 testl $3,CS(%rsp)
924 jnz paranoid_userspace\trace
925 paranoid_swapgs\trace:
926 .if \trace
927 TRACE_IRQS_IRETQ 0
928 .endif
929 SWAPGS_UNSAFE_STACK
930 paranoid_restore\trace:
931 RESTORE_ALL 8
932 jmp irq_return
933 paranoid_userspace\trace:
934 GET_THREAD_INFO(%rcx)
935 movl TI_flags(%rcx),%ebx
936 andl $_TIF_WORK_MASK,%ebx
937 jz paranoid_swapgs\trace
938 movq %rsp,%rdi /* &pt_regs */
939 call sync_regs
940 movq %rax,%rsp /* switch stack for scheduling */
941 testl $_TIF_NEED_RESCHED,%ebx
942 jnz paranoid_schedule\trace
943 movl %ebx,%edx /* arg3: thread flags */
944 .if \trace
945 TRACE_IRQS_ON
946 .endif
947 ENABLE_INTERRUPTS(CLBR_NONE)
948 xorl %esi,%esi /* arg2: oldset */
949 movq %rsp,%rdi /* arg1: &pt_regs */
950 call do_notify_resume
951 DISABLE_INTERRUPTS(CLBR_NONE)
952 .if \trace
953 TRACE_IRQS_OFF
954 .endif
955 jmp paranoid_userspace\trace
956 paranoid_schedule\trace:
957 .if \trace
958 TRACE_IRQS_ON
959 .endif
960 ENABLE_INTERRUPTS(CLBR_ANY)
961 call schedule
962 DISABLE_INTERRUPTS(CLBR_ANY)
963 .if \trace
964 TRACE_IRQS_OFF
965 .endif
966 jmp paranoid_userspace\trace
967 CFI_ENDPROC
968 .endm
969
970 /*
971 * Exception entry point. This expects an error code/orig_rax on the stack
972 * and the exception handler in %rax.
973 */
974 KPROBE_ENTRY(error_entry)
975 _frame RDI
976 CFI_REL_OFFSET rax,0
977 /* rdi slot contains rax, oldrax contains error code */
978 cld
979 subq $14*8,%rsp
980 CFI_ADJUST_CFA_OFFSET (14*8)
981 movq %rsi,13*8(%rsp)
982 CFI_REL_OFFSET rsi,RSI
983 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
984 CFI_REGISTER rax,rsi
985 movq %rdx,12*8(%rsp)
986 CFI_REL_OFFSET rdx,RDX
987 movq %rcx,11*8(%rsp)
988 CFI_REL_OFFSET rcx,RCX
989 movq %rsi,10*8(%rsp) /* store rax */
990 CFI_REL_OFFSET rax,RAX
991 movq %r8, 9*8(%rsp)
992 CFI_REL_OFFSET r8,R8
993 movq %r9, 8*8(%rsp)
994 CFI_REL_OFFSET r9,R9
995 movq %r10,7*8(%rsp)
996 CFI_REL_OFFSET r10,R10
997 movq %r11,6*8(%rsp)
998 CFI_REL_OFFSET r11,R11
999 movq %rbx,5*8(%rsp)
1000 CFI_REL_OFFSET rbx,RBX
1001 movq %rbp,4*8(%rsp)
1002 CFI_REL_OFFSET rbp,RBP
1003 movq %r12,3*8(%rsp)
1004 CFI_REL_OFFSET r12,R12
1005 movq %r13,2*8(%rsp)
1006 CFI_REL_OFFSET r13,R13
1007 movq %r14,1*8(%rsp)
1008 CFI_REL_OFFSET r14,R14
1009 movq %r15,(%rsp)
1010 CFI_REL_OFFSET r15,R15
1011 xorl %ebx,%ebx
1012 testl $3,CS(%rsp)
1013 je error_kernelspace
1014 error_swapgs:
1015 SWAPGS
1016 error_sti:
1017 movq %rdi,RDI(%rsp)
1018 CFI_REL_OFFSET rdi,RDI
1019 movq %rsp,%rdi
1020 movq ORIG_RAX(%rsp),%rsi /* get error code */
1021 movq $-1,ORIG_RAX(%rsp)
1022 call *%rax
1023 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1024 error_exit:
1025 movl %ebx,%eax
1026 RESTORE_REST
1027 DISABLE_INTERRUPTS(CLBR_NONE)
1028 TRACE_IRQS_OFF
1029 GET_THREAD_INFO(%rcx)
1030 testl %eax,%eax
1031 jne retint_kernel
1032 LOCKDEP_SYS_EXIT_IRQ
1033 movl TI_flags(%rcx),%edx
1034 movl $_TIF_WORK_MASK,%edi
1035 andl %edi,%edx
1036 jnz retint_careful
1037 jmp retint_swapgs
1038 CFI_ENDPROC
1039
1040 error_kernelspace:
1041 incl %ebx
1042 /* There are two places in the kernel that can potentially fault with
1043 usergs. Handle them here. The exception handlers after
1044 iret run with kernel gs again, so don't set the user space flag.
1045 B stepping K8s sometimes report an truncated RIP for IRET
1046 exceptions returning to compat mode. Check for these here too. */
1047 leaq irq_return(%rip),%rcx
1048 cmpq %rcx,RIP(%rsp)
1049 je error_swapgs
1050 movl %ecx,%ecx /* zero extend */
1051 cmpq %rcx,RIP(%rsp)
1052 je error_swapgs
1053 cmpq $gs_change,RIP(%rsp)
1054 je error_swapgs
1055 jmp error_sti
1056 KPROBE_END(error_entry)
1057
1058 /* Reload gs selector with exception handling */
1059 /* edi: new selector */
1060 ENTRY(native_load_gs_index)
1061 CFI_STARTPROC
1062 pushf
1063 CFI_ADJUST_CFA_OFFSET 8
1064 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1065 SWAPGS
1066 gs_change:
1067 movl %edi,%gs
1068 2: mfence /* workaround */
1069 SWAPGS
1070 popf
1071 CFI_ADJUST_CFA_OFFSET -8
1072 ret
1073 CFI_ENDPROC
1074 ENDPROC(native_load_gs_index)
1075
1076 .section __ex_table,"a"
1077 .align 8
1078 .quad gs_change,bad_gs
1079 .previous
1080 .section .fixup,"ax"
1081 /* running with kernelgs */
1082 bad_gs:
1083 SWAPGS /* switch back to user gs */
1084 xorl %eax,%eax
1085 movl %eax,%gs
1086 jmp 2b
1087 .previous
1088
1089 /*
1090 * Create a kernel thread.
1091 *
1092 * C extern interface:
1093 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1094 *
1095 * asm input arguments:
1096 * rdi: fn, rsi: arg, rdx: flags
1097 */
1098 ENTRY(kernel_thread)
1099 CFI_STARTPROC
1100 FAKE_STACK_FRAME $child_rip
1101 SAVE_ALL
1102
1103 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1104 movq %rdx,%rdi
1105 orq kernel_thread_flags(%rip),%rdi
1106 movq $-1, %rsi
1107 movq %rsp, %rdx
1108
1109 xorl %r8d,%r8d
1110 xorl %r9d,%r9d
1111
1112 # clone now
1113 call do_fork
1114 movq %rax,RAX(%rsp)
1115 xorl %edi,%edi
1116
1117 /*
1118 * It isn't worth to check for reschedule here,
1119 * so internally to the x86_64 port you can rely on kernel_thread()
1120 * not to reschedule the child before returning, this avoids the need
1121 * of hacks for example to fork off the per-CPU idle tasks.
1122 * [Hopefully no generic code relies on the reschedule -AK]
1123 */
1124 RESTORE_ALL
1125 UNFAKE_STACK_FRAME
1126 ret
1127 CFI_ENDPROC
1128 ENDPROC(kernel_thread)
1129
1130 child_rip:
1131 pushq $0 # fake return address
1132 CFI_STARTPROC
1133 /*
1134 * Here we are in the child and the registers are set as they were
1135 * at kernel_thread() invocation in the parent.
1136 */
1137 movq %rdi, %rax
1138 movq %rsi, %rdi
1139 call *%rax
1140 # exit
1141 mov %eax, %edi
1142 call do_exit
1143 CFI_ENDPROC
1144 ENDPROC(child_rip)
1145
1146 /*
1147 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1148 *
1149 * C extern interface:
1150 * extern long execve(char *name, char **argv, char **envp)
1151 *
1152 * asm input arguments:
1153 * rdi: name, rsi: argv, rdx: envp
1154 *
1155 * We want to fallback into:
1156 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
1157 *
1158 * do_sys_execve asm fallback arguments:
1159 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1160 */
1161 ENTRY(kernel_execve)
1162 CFI_STARTPROC
1163 FAKE_STACK_FRAME $0
1164 SAVE_ALL
1165 movq %rsp,%rcx
1166 call sys_execve
1167 movq %rax, RAX(%rsp)
1168 RESTORE_REST
1169 testq %rax,%rax
1170 je int_ret_from_sys_call
1171 RESTORE_ARGS
1172 UNFAKE_STACK_FRAME
1173 ret
1174 CFI_ENDPROC
1175 ENDPROC(kernel_execve)
1176
1177 KPROBE_ENTRY(page_fault)
1178 errorentry do_page_fault
1179 KPROBE_END(page_fault)
1180
1181 ENTRY(coprocessor_error)
1182 zeroentry do_coprocessor_error
1183 END(coprocessor_error)
1184
1185 ENTRY(simd_coprocessor_error)
1186 zeroentry do_simd_coprocessor_error
1187 END(simd_coprocessor_error)
1188
1189 ENTRY(device_not_available)
1190 zeroentry math_state_restore
1191 END(device_not_available)
1192
1193 /* runs on exception stack */
1194 KPROBE_ENTRY(debug)
1195 INTR_FRAME
1196 PARAVIRT_ADJUST_EXCEPTION_FRAME
1197 pushq $0
1198 CFI_ADJUST_CFA_OFFSET 8
1199 paranoidentry do_debug, DEBUG_STACK
1200 paranoidexit
1201 KPROBE_END(debug)
1202
1203 /* runs on exception stack */
1204 KPROBE_ENTRY(nmi)
1205 INTR_FRAME
1206 PARAVIRT_ADJUST_EXCEPTION_FRAME
1207 pushq $-1
1208 CFI_ADJUST_CFA_OFFSET 8
1209 paranoidentry do_nmi, 0, 0
1210 #ifdef CONFIG_TRACE_IRQFLAGS
1211 paranoidexit 0
1212 #else
1213 jmp paranoid_exit1
1214 CFI_ENDPROC
1215 #endif
1216 KPROBE_END(nmi)
1217
1218 KPROBE_ENTRY(int3)
1219 INTR_FRAME
1220 PARAVIRT_ADJUST_EXCEPTION_FRAME
1221 pushq $0
1222 CFI_ADJUST_CFA_OFFSET 8
1223 paranoidentry do_int3, DEBUG_STACK
1224 jmp paranoid_exit1
1225 CFI_ENDPROC
1226 KPROBE_END(int3)
1227
1228 ENTRY(overflow)
1229 zeroentry do_overflow
1230 END(overflow)
1231
1232 ENTRY(bounds)
1233 zeroentry do_bounds
1234 END(bounds)
1235
1236 ENTRY(invalid_op)
1237 zeroentry do_invalid_op
1238 END(invalid_op)
1239
1240 ENTRY(coprocessor_segment_overrun)
1241 zeroentry do_coprocessor_segment_overrun
1242 END(coprocessor_segment_overrun)
1243
1244 /* runs on exception stack */
1245 ENTRY(double_fault)
1246 XCPT_FRAME
1247 PARAVIRT_ADJUST_EXCEPTION_FRAME
1248 paranoidentry do_double_fault
1249 jmp paranoid_exit1
1250 CFI_ENDPROC
1251 END(double_fault)
1252
1253 ENTRY(invalid_TSS)
1254 errorentry do_invalid_TSS
1255 END(invalid_TSS)
1256
1257 ENTRY(segment_not_present)
1258 errorentry do_segment_not_present
1259 END(segment_not_present)
1260
1261 /* runs on exception stack */
1262 ENTRY(stack_segment)
1263 XCPT_FRAME
1264 PARAVIRT_ADJUST_EXCEPTION_FRAME
1265 paranoidentry do_stack_segment
1266 jmp paranoid_exit1
1267 CFI_ENDPROC
1268 END(stack_segment)
1269
1270 KPROBE_ENTRY(general_protection)
1271 errorentry do_general_protection
1272 KPROBE_END(general_protection)
1273
1274 ENTRY(alignment_check)
1275 errorentry do_alignment_check
1276 END(alignment_check)
1277
1278 ENTRY(divide_error)
1279 zeroentry do_divide_error
1280 END(divide_error)
1281
1282 ENTRY(spurious_interrupt_bug)
1283 zeroentry do_spurious_interrupt_bug
1284 END(spurious_interrupt_bug)
1285
1286 #ifdef CONFIG_X86_MCE
1287 /* runs on exception stack */
1288 ENTRY(machine_check)
1289 INTR_FRAME
1290 PARAVIRT_ADJUST_EXCEPTION_FRAME
1291 pushq $0
1292 CFI_ADJUST_CFA_OFFSET 8
1293 paranoidentry do_machine_check
1294 jmp paranoid_exit1
1295 CFI_ENDPROC
1296 END(machine_check)
1297 #endif
1298
1299 /* Call softirq on interrupt stack. Interrupts are off. */
1300 ENTRY(call_softirq)
1301 CFI_STARTPROC
1302 push %rbp
1303 CFI_ADJUST_CFA_OFFSET 8
1304 CFI_REL_OFFSET rbp,0
1305 mov %rsp,%rbp
1306 CFI_DEF_CFA_REGISTER rbp
1307 incl %gs:pda_irqcount
1308 cmove %gs:pda_irqstackptr,%rsp
1309 push %rbp # backlink for old unwinder
1310 call __do_softirq
1311 leaveq
1312 CFI_DEF_CFA_REGISTER rsp
1313 CFI_ADJUST_CFA_OFFSET -8
1314 decl %gs:pda_irqcount
1315 ret
1316 CFI_ENDPROC
1317 ENDPROC(call_softirq)
1318
1319 KPROBE_ENTRY(ignore_sysret)
1320 CFI_STARTPROC
1321 mov $-ENOSYS,%eax
1322 sysret
1323 CFI_ENDPROC
1324 ENDPROC(ignore_sysret)
1325
1326 #ifdef CONFIG_XEN
1327 ENTRY(xen_hypervisor_callback)
1328 zeroentry xen_do_hypervisor_callback
1329 END(xen_hypervisor_callback)
1330
1331 /*
1332 # A note on the "critical region" in our callback handler.
1333 # We want to avoid stacking callback handlers due to events occurring
1334 # during handling of the last event. To do this, we keep events disabled
1335 # until we've done all processing. HOWEVER, we must enable events before
1336 # popping the stack frame (can't be done atomically) and so it would still
1337 # be possible to get enough handler activations to overflow the stack.
1338 # Although unlikely, bugs of that kind are hard to track down, so we'd
1339 # like to avoid the possibility.
1340 # So, on entry to the handler we detect whether we interrupted an
1341 # existing activation in its critical region -- if so, we pop the current
1342 # activation and restart the handler using the previous one.
1343 */
1344 ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1345 CFI_STARTPROC
1346 /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1347 see the correct pointer to the pt_regs */
1348 movq %rdi, %rsp # we don't return, adjust the stack frame
1349 CFI_ENDPROC
1350 CFI_DEFAULT_STACK
1351 11: incl %gs:pda_irqcount
1352 movq %rsp,%rbp
1353 CFI_DEF_CFA_REGISTER rbp
1354 cmovzq %gs:pda_irqstackptr,%rsp
1355 pushq %rbp # backlink for old unwinder
1356 call xen_evtchn_do_upcall
1357 popq %rsp
1358 CFI_DEF_CFA_REGISTER rsp
1359 decl %gs:pda_irqcount
1360 jmp error_exit
1361 CFI_ENDPROC
1362 END(do_hypervisor_callback)
1363
1364 /*
1365 # Hypervisor uses this for application faults while it executes.
1366 # We get here for two reasons:
1367 # 1. Fault while reloading DS, ES, FS or GS
1368 # 2. Fault while executing IRET
1369 # Category 1 we do not need to fix up as Xen has already reloaded all segment
1370 # registers that could be reloaded and zeroed the others.
1371 # Category 2 we fix up by killing the current process. We cannot use the
1372 # normal Linux return path in this case because if we use the IRET hypercall
1373 # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1374 # We distinguish between categories by comparing each saved segment register
1375 # with its current contents: any discrepancy means we in category 1.
1376 */
1377 ENTRY(xen_failsafe_callback)
1378 framesz = (RIP-0x30) /* workaround buggy gas */
1379 _frame framesz
1380 CFI_REL_OFFSET rcx, 0
1381 CFI_REL_OFFSET r11, 8
1382 movw %ds,%cx
1383 cmpw %cx,0x10(%rsp)
1384 CFI_REMEMBER_STATE
1385 jne 1f
1386 movw %es,%cx
1387 cmpw %cx,0x18(%rsp)
1388 jne 1f
1389 movw %fs,%cx
1390 cmpw %cx,0x20(%rsp)
1391 jne 1f
1392 movw %gs,%cx
1393 cmpw %cx,0x28(%rsp)
1394 jne 1f
1395 /* All segments match their saved values => Category 2 (Bad IRET). */
1396 movq (%rsp),%rcx
1397 CFI_RESTORE rcx
1398 movq 8(%rsp),%r11
1399 CFI_RESTORE r11
1400 addq $0x30,%rsp
1401 CFI_ADJUST_CFA_OFFSET -0x30
1402 pushq $0
1403 CFI_ADJUST_CFA_OFFSET 8
1404 pushq %r11
1405 CFI_ADJUST_CFA_OFFSET 8
1406 pushq %rcx
1407 CFI_ADJUST_CFA_OFFSET 8
1408 jmp general_protection
1409 CFI_RESTORE_STATE
1410 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1411 movq (%rsp),%rcx
1412 CFI_RESTORE rcx
1413 movq 8(%rsp),%r11
1414 CFI_RESTORE r11
1415 addq $0x30,%rsp
1416 CFI_ADJUST_CFA_OFFSET -0x30
1417 pushq $0
1418 CFI_ADJUST_CFA_OFFSET 8
1419 SAVE_ALL
1420 jmp error_exit
1421 CFI_ENDPROC
1422 END(xen_failsafe_callback)
1423
1424 #endif /* CONFIG_XEN */