br.cond.sptk.many b7
END(load_switch_stack)
-GLOBAL_ENTRY(__ia64_syscall)
- .regstk 6,0,0,0
- mov r15=in5 // put syscall number in place
- break __BREAK_SYSCALL
- movl r2=errno
- cmp.eq p6,p7=-1,r10
- ;;
-(p6) st4 [r2]=r8
-(p6) mov r8=-1
- br.ret.sptk.many rp
-END(__ia64_syscall)
-
GLOBAL_ENTRY(execve)
mov r15=__NR_execve // put syscall number in place
break __BREAK_SYSCALL
* r8-r11: restored (syscall return value(s))
* r12: restored (user-level stack pointer)
* r13: restored (user-level thread pointer)
- * r14: cleared
+ * r14: set to __kernel_syscall_via_epc
* r15: restored (syscall #)
* r16-r17: cleared
* r18: user-level b6
* pr: restored (user-level pr)
* b0: restored (user-level rp)
* b6: restored
- * b7: cleared
+ * b7: set to __kernel_syscall_via_epc
* ar.unat: restored (user-level ar.unat)
* ar.pfs: restored (user-level ar.pfs)
* ar.rsc: restored (user-level ar.rsc)
;;
(p6) ld4 r31=[r18] // load current_thread_info()->flags
ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
- mov b7=r0 // clear b7
+ nop.i 0
;;
- ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage)
+ mov r16=ar.bsp // M2 get existing backing store pointer
ld8 r18=[r2],PT(R9)-PT(B6) // load b6
(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
;;
- mov r16=ar.bsp // M2 get existing backing store pointer
+ ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage)
(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending?
(p6) br.cond.spnt .work_pending_syscall
;;
// start restoring the state saved on the kernel stack (struct pt_regs):
ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
ld8 r11=[r3],PT(CR_IIP)-PT(R11)
- mov f6=f0 // clear f6
+(pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE!
;;
invala // M0|1 invalidate ALAT
- rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection
- mov f9=f0 // clear f9
+ rsm psr.i | psr.ic // M2 turn off interrupts and interruption collection
+ cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs
- ld8 r29=[r2],16 // load cr.ipsr
- ld8 r28=[r3],16 // load cr.iip
- mov f8=f0 // clear f8
+ ld8 r29=[r2],16 // M0|1 load cr.ipsr
+ ld8 r28=[r3],16 // M0|1 load cr.iip
+ mov r22=r0 // A clear r22
;;
ld8 r30=[r2],16 // M0|1 load cr.ifs
ld8 r25=[r3],16 // M0|1 load ar.unat
- cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs
+(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
;;
ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
-(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
- mov f10=f0 // clear f10
+(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
+ nop 0
;;
- ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
- ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc
- mov f11=f0 // clear f11
+ ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
+ ld8 r27=[r3],PT(PR)-PT(AR_RSC) // M0|1 load ar.rsc
+ mov f6=f0 // F clear f6
;;
- ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage)
- ld8 r31=[r3],PT(R1)-PT(PR) // load predicates
-(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+ ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // M0|1 load ar.rnat (may be garbage)
+ ld8 r31=[r3],PT(R1)-PT(PR) // M0|1 load predicates
+ mov f7=f0 // F clear f7
;;
- ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr
- ld8.fill r1=[r3],16 // load r1
-(pUStk) mov r17=1
+ ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // M0|1 load ar.fpsr
+ ld8.fill r1=[r3],16 // M0|1 load r1
+(pUStk) mov r17=1 // A
;;
- srlz.d // M0 ensure interruption collection is off
- ld8.fill r13=[r3],16
- mov f7=f0 // clear f7
+(pUStk) st1 [r14]=r17 // M2|3
+ ld8.fill r13=[r3],16 // M0|1
+ mov f8=f0 // F clear f8
;;
- ld8.fill r12=[r2] // restore r12 (sp)
- mov.m ar.ssd=r0 // M2 clear ar.ssd
- mov r22=r0 // clear r22
+ ld8.fill r12=[r2] // M0|1 restore r12 (sp)
+ ld8.fill r15=[r3] // M0|1 restore r15
+ mov b6=r18 // I0 restore b6
- ld8.fill r15=[r3] // restore r15
-(pUStk) st1 [r14]=r17
- addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
+ addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
+ mov f9=f0 // F clear f9
+(pKStk) br.cond.dpnt.many skip_rbs_switch // B
+
+ srlz.d // M0 ensure interruption collection is off (for cover)
+ shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
+ cover // B add current frame into dirty partition & set cr.ifs
;;
-(pUStk) ld4 r17=[r3] // r17 = cpu_data->phys_stacked_size_p8
- mov.m ar.csd=r0 // M2 clear ar.csd
- mov b6=r18 // I0 restore b6
+(pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8
+ mov r19=ar.bsp // M2 get new backing store pointer
+ mov f10=f0 // F clear f10
+
+ nop.m 0
+ movl r14=__kernel_syscall_via_epc // X
;;
- mov r14=r0 // clear r14
- shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
-(pKStk) br.cond.dpnt.many skip_rbs_switch
+ mov.m ar.csd=r0 // M2 clear ar.csd
+ mov.m ar.ccv=r0 // M2 clear ar.ccv
+ mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc)
- mov.m ar.ccv=r0 // clear ar.ccv
-(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
- br.cond.sptk.many rbs_switch
+ mov.m ar.ssd=r0 // M2 clear ar.ssd
+ mov f11=f0 // F clear f11
+ br.cond.sptk.many rbs_switch // B
END(ia64_leave_syscall)
#ifdef CONFIG_IA32_SUPPORT
st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit
.mem.offset 8,0
st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit
- END(ia64_ret_from_ia32_execve_syscall)
+ END(ia64_ret_from_ia32_execve)
// fall through
#endif /* CONFIG_IA32_SUPPORT */
GLOBAL_ENTRY(ia64_leave_kernel)
ldf.fill f7=[r2],PT(F11)-PT(F7)
ldf.fill f8=[r3],32
;;
- srlz.i // ensure interruption collection is off
+ srlz.d // ensure that inter. collection is off (VHPT is don't care, since text is pinned)
mov ar.ccv=r15
;;
ldf.fill f11=[r2]
* NOTE: alloc, loadrs, and cover can't be predicated.
*/
(pNonSys) br.cond.dpnt dont_preserve_current_frame
-
-rbs_switch:
cover // add current frame into dirty partition and set cr.ifs
;;
mov r19=ar.bsp // get new backing store pointer
+rbs_switch:
sub r16=r16,r18 // krbs = old bsp - size of dirty partition
cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs
;;
mov loc5=0
mov loc6=0
mov loc7=0
-(pRecurse) br.call.sptk.few b0=rse_clear_invalid
+(pRecurse) br.call.dptk.few b0=rse_clear_invalid
;;
mov loc8=0
mov loc9=0
cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret
mov loc10=0
mov loc11=0
-(pReturn) br.ret.sptk.many b0
+(pReturn) br.ret.dptk.many b0
#endif /* !CONFIG_ITANIUM */
# undef pRecurse
# undef pReturn
;;
(pNonSys) mov out2=0 // out2==0 => not a syscall
.fframe 16
- .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!)
+ .spillsp ar.unat, 16
st8 [sp]=r9,-16 // allocate space for ar.unat and save it
st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch
.body
adds out2=8,sp // out2=&sigscratch->ar_pfs
;;
.fframe 16
- .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!)
+ .spillsp ar.unat, 16
st8 [sp]=r9,-16 // allocate space for ar.unat and save it
st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch
.body
;;
st8 [r2]=r14 // update current->blocked with new mask
- cmpxchg4.acq r14=[r9],r18,ar.ccv // current->thread_info->flags <- r18
+ cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18
;;
- cmp.ne p6,p0=r17,r14 // update failed?
+ cmp.ne p6,p0=r17,r8 // update failed?
(p6) br.cond.spnt.few 1b // yes -> retry
#ifdef CONFIG_SMP
.altrp b6
.body
/*
- * We get here for syscalls that don't have a lightweight handler. For those, we
- * need to bubble down into the kernel and that requires setting up a minimal
- * pt_regs structure, and initializing the CPU state more or less as if an
- * interruption had occurred. To make syscall-restarts work, we setup pt_regs
- * such that cr_iip points to the second instruction in syscall_via_break.
- * Decrementing the IP hence will restart the syscall via break and not
- * decrementing IP will return us to the caller, as usual. Note that we preserve
- * the value of psr.pp rather than initializing it from dcr.pp. This makes it
- * possible to distinguish fsyscall execution from other privileged execution.
+ * We get here for syscalls that don't have a lightweight
+ * handler. For those, we need to bubble down into the kernel
+ * and that requires setting up a minimal pt_regs structure,
+ * and initializing the CPU state more or less as if an
+ * interruption had occurred. To make syscall-restarts work,
+ * we setup pt_regs such that cr_iip points to the second
+ * instruction in syscall_via_break. Decrementing the IP
+ * hence will restart the syscall via break and not
+ * decrementing IP will return us to the caller, as usual.
+ * Note that we preserve the value of psr.pp rather than
+ * initializing it from dcr.pp. This makes it possible to
+ * distinguish fsyscall execution from other privileged
+ * execution.
*
* On entry:
- * - normal fsyscall handler register usage, except that we also have:
+ * - normal fsyscall handler register usage, except
+ * that we also have:
* - r18: address of syscall entry point
* - r21: ar.fpsr
* - r26: ar.pfs
* - r27: ar.rsc
* - r29: psr
+ *
+ * We used to clear some PSR bits here but that requires slow
+ * serialization. Fortuntely, that isn't really necessary.
+ * The rationale is as follows: we used to clear bits
+ * ~PSR_PRESERVED_BITS in PSR.L. Since
+ * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
+ * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
+ * However,
+ *
+ * PSR.BE : already is turned off in __kernel_syscall_via_epc()
+ * PSR.AC : don't care (kernel normally turns PSR.AC on)
+ * PSR.I : already turned off by the time fsys_bubble_down gets
+ * invoked
+ * PSR.DFL: always 0 (kernel never turns it on)
+ * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
+ * initiative
+ * PSR.DI : always 0 (kernel never turns it on)
+ * PSR.SI : always 0 (kernel never turns it on)
+ * PSR.DB : don't care --- kernel never enables kernel-level
+ * breakpoints
+ * PSR.TB : must be 0 already; if it wasn't zero on entry to
+ * __kernel_syscall_via_epc, the branch to fsys_bubble_down
+ * will trigger a taken branch; the taken-trap-handler then
+ * converts the syscall into a break-based system-call.
*/
-# define PSR_PRESERVED_BITS (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
- | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \
- | IA64_PSR_IC)
/*
- * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have
- * to synthesize.
+ * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
+ * The rest we have to synthesize.
*/
-# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
+# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \
+ | (0x1 << IA64_PSR_RI_BIT) \
| IA64_PSR_BN | IA64_PSR_I)
- invala
- movl r8=PSR_ONE_BITS
+ invala // M0|1
+ movl r14=ia64_ret_from_syscall // X
- mov r25=ar.unat // save ar.unat (5 cyc)
- movl r9=PSR_PRESERVED_BITS
+ nop.m 0
+ movl r28=__kernel_syscall_via_break // X create cr.iip
+ ;;
- mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0
- movl r28=__kernel_syscall_via_break
+ mov r2=r16 // A get task addr to addl-addressable register
+ adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
+ mov r31=pr // I0 save pr (2 cyc)
;;
- mov r23=ar.bspstore // save ar.bspstore (12 cyc)
- mov r31=pr // save pr (2 cyc)
- mov r20=r1 // save caller's gp in r20
+ st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
+ addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS
+ add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A
;;
- mov r2=r16 // copy current task addr to addl-addressable register
- and r9=r9,r29
- mov r19=b6 // save b6 (2 cyc)
+ ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags
+ lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store
+ nop.i 0
;;
- mov psr.l=r9 // slam the door (17 cyc to srlz.i)
- or r29=r8,r29 // construct cr.ipsr value to save
- addl r22=IA64_RBS_OFFSET,r2 // compute base of RBS
+ mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
+ nop.m 0
+ nop.i 0
;;
- // GAS reports a spurious RAW hazard on the read of ar.rnat because it thinks
- // we may be reading ar.itc after writing to psr.l. Avoid that message with
- // this directive:
- dv_serialize_data
- mov.m r24=ar.rnat // read ar.rnat (5 cyc lat)
- lfetch.fault.excl.nt1 [r22]
- adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2
-
- // ensure previous insn group is issued before we stall for srlz.i:
+ mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
+ mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!)
+ nop.i 0
;;
- srlz.i // ensure new psr.l has been established
- /////////////////////////////////////////////////////////////////////////////
- ////////// from this point on, execution is not interruptible anymore
- /////////////////////////////////////////////////////////////////////////////
- addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack
- cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1
+ mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS
+ movl r8=PSR_ONE_BITS // X
;;
- st1 [r16]=r0 // clear current->thread.on_ustack flag
- mov ar.bspstore=r22 // switch to kernel RBS
- mov b6=r18 // copy syscall entry-point to b6 (7 cyc)
- add r3=TI_FLAGS+IA64_TASK_SIZE,r2
+ mov r25=ar.unat // M2 (5 cyc) save ar.unat
+ mov r19=b6 // I0 save b6 (2 cyc)
+ mov r20=r1 // A save caller's gp in r20
;;
- ld4 r3=[r3] // r2 = current_thread_info()->flags
- mov r18=ar.bsp // save (kernel) ar.bsp (12 cyc)
- mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0
- br.call.sptk.many b7=ia64_syscall_setup
- ;;
- ssm psr.i
- movl r2=ia64_ret_from_syscall
+ or r29=r8,r29 // A construct cr.ipsr value to save
+ mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc)
+ addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
+
+ mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc)
+ cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
+ br.call.sptk.many b7=ia64_syscall_setup // B
;;
- mov rp=r2 // set the real return addr
- and r3=_TIF_SYSCALL_TRACEAUDIT,r3
+ mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
+ mov rp=r14 // I0 set the real return addr
+ and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
;;
- cmp.eq p8,p0=r3,r0
+ ssm psr.i // M2 we're on kernel stacks now, reenable irqs
+ cmp.eq p8,p0=r3,r0 // A
+(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
-(p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8
-(p8) br.call.sptk.many b6=b6 // ignore this return addr
- br.cond.sptk ia64_trace_syscall
+ nop.m 0
+(p8) br.call.sptk.many b6=b6 // B (ignore return address)
+ br.cond.spnt ia64_trace_syscall // B
END(fsys_bubble_down)
.rodata
{
struct ia64_psr *psr = ia64_psr(ia64_task_regs(task));
+ /*
+ * Prevent migrating this task while
+ * we're fiddling with the FPU state
+ */
+ preempt_disable();
if (ia64_is_local_fpu_owner(task) && psr->mfh) {
psr->mfh = 0;
task->thread.flags |= IA64_THREAD_FPH_VALID;
ia64_save_fpu(&task->thread.fph[0]);
}
+ preempt_enable();
}
/*
unsigned long cfm)
{
struct unw_frame_info info, prev_info;
- unsigned long ip, pr;
+ unsigned long ip, sp, pr;
unw_init_from_blocked_task(&info, child);
while (1) {
prev_info = info;
if (unw_unwind(&info) < 0)
return;
- if (unw_get_rp(&info, &ip) < 0)
+
+ unw_get_sp(&info, &sp);
+ if ((long)((unsigned long)child + IA64_STK_OFFSET - sp)
+ < IA64_PT_REGS_SIZE) {
+ dprintk("ptrace.%s: ran off the top of the kernel "
+ "stack\n", __FUNCTION__);
+ return;
+ }
+ if (unw_get_pr (&prev_info, &pr) < 0) {
+ unw_get_rp(&prev_info, &ip);
+ dprintk("ptrace.%s: failed to read "
+ "predicate register (ip=0x%lx)\n",
+ __FUNCTION__, ip);
return;
- if (ip < FIXADDR_USER_END)
+ }
+ if (unw_is_intr_frame(&info)
+ && (pr & (1UL << PRED_USER_STACK)))
break;
}
+ /*
+ * Note: at the time of this call, the target task is blocked
+ * in notify_resume_user() and by clearling PRED_LEAVE_SYSCALL
+ * (aka, "pLvSys") we redirect execution from
+ * .work_pending_syscall_end to .work_processed_kernel.
+ */
unw_get_pr(&prev_info, &pr);
- pr &= ~(1UL << PRED_SYSCALL);
+ pr &= ~((1UL << PRED_SYSCALL) | (1UL << PRED_LEAVE_SYSCALL));
pr |= (1UL << PRED_NON_SYSCALL);
unw_set_pr(&prev_info, pr);
pt->cr_ifs = (1UL << 63) | cfm;
+ /*
+ * Clear the memory that is NOT written on syscall-entry to
+ * ensure we do not leak kernel-state to user when execution
+ * resumes.
+ */
+ pt->r2 = 0;
+ pt->r3 = 0;
+ pt->r14 = 0;
+ memset(&pt->r16, 0, 16*8); /* clear r16-r31 */
+ memset(&pt->f6, 0, 6*16); /* clear f6-f11 */
+ pt->b7 = 0;
+ pt->ar_ccv = 0;
+ pt->ar_csd = 0;
+ pt->ar_ssd = 0;
}
static int
long arg4, long arg5, long arg6, long arg7,
struct pt_regs regs)
{
- long syscall;
+ if (test_thread_flag(TIF_SYSCALL_TRACE)
+ && (current->ptrace & PT_PTRACED))
+ syscall_trace();
if (unlikely(current->audit_context)) {
- if (IS_IA32_PROCESS(®s))
+ long syscall;
+ int arch;
+
+ if (IS_IA32_PROCESS(®s)) {
syscall = regs.r1;
- else
+ arch = AUDIT_ARCH_I386;
+ } else {
syscall = regs.r15;
+ arch = AUDIT_ARCH_IA64;
+ }
- audit_syscall_entry(current, syscall, arg0, arg1, arg2, arg3);
+ audit_syscall_entry(current, arch, syscall, arg0, arg1, arg2, arg3);
}
- if (test_thread_flag(TIF_SYSCALL_TRACE)
- && (current->ptrace & PT_PTRACED))
- syscall_trace();
}
/* "asmlinkage" so the input arguments are preserved... */
struct pt_regs regs)
{
if (unlikely(current->audit_context))
- audit_syscall_exit(current, regs.r8);
+ audit_syscall_exit(current, AUDITSC_RESULT(regs.r10), regs.r8);
if (test_thread_flag(TIF_SYSCALL_TRACE)
&& (current->ptrace & PT_PTRACED))