[PATCH] namespaces: utsname: use init_utsname when appropriate
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / x86_64 / kernel / process.c
1 /*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
11 *
12 * CPU hotplug support - ashok.raj@intel.com
13 */
14
15 /*
16 * This file handles the architecture-dependent parts of process handling..
17 */
18
19 #include <stdarg.h>
20
21 #include <linux/cpu.h>
22 #include <linux/errno.h>
23 #include <linux/sched.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/elfcore.h>
27 #include <linux/smp.h>
28 #include <linux/slab.h>
29 #include <linux/user.h>
30 #include <linux/module.h>
31 #include <linux/a.out.h>
32 #include <linux/interrupt.h>
33 #include <linux/delay.h>
34 #include <linux/ptrace.h>
35 #include <linux/utsname.h>
36 #include <linux/random.h>
37 #include <linux/notifier.h>
38 #include <linux/kprobes.h>
39
40 #include <asm/uaccess.h>
41 #include <asm/pgtable.h>
42 #include <asm/system.h>
43 #include <asm/io.h>
44 #include <asm/processor.h>
45 #include <asm/i387.h>
46 #include <asm/mmu_context.h>
47 #include <asm/pda.h>
48 #include <asm/prctl.h>
49 #include <asm/kdebug.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
54
55 asmlinkage extern void ret_from_fork(void);
56
57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59 unsigned long boot_option_idle_override = 0;
60 EXPORT_SYMBOL(boot_option_idle_override);
61
62 /*
63 * Powermanagement idle function, if any..
64 */
65 void (*pm_idle)(void);
66 EXPORT_SYMBOL(pm_idle);
67 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
69 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
70
71 void idle_notifier_register(struct notifier_block *n)
72 {
73 atomic_notifier_chain_register(&idle_notifier, n);
74 }
75 EXPORT_SYMBOL_GPL(idle_notifier_register);
76
77 void idle_notifier_unregister(struct notifier_block *n)
78 {
79 atomic_notifier_chain_unregister(&idle_notifier, n);
80 }
81 EXPORT_SYMBOL(idle_notifier_unregister);
82
83 void enter_idle(void)
84 {
85 write_pda(isidle, 1);
86 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
87 }
88
89 static void __exit_idle(void)
90 {
91 if (read_pda(isidle) == 0)
92 return;
93 write_pda(isidle, 0);
94 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95 }
96
97 /* Called from interrupts to signify idle end */
98 void exit_idle(void)
99 {
100 /* idle loop has pid 0 */
101 if (current->pid)
102 return;
103 __exit_idle();
104 }
105
106 /*
107 * We use this if we don't have any better
108 * idle routine..
109 */
110 static void default_idle(void)
111 {
112 local_irq_enable();
113
114 current_thread_info()->status &= ~TS_POLLING;
115 smp_mb__after_clear_bit();
116 while (!need_resched()) {
117 local_irq_disable();
118 if (!need_resched())
119 safe_halt();
120 else
121 local_irq_enable();
122 }
123 current_thread_info()->status |= TS_POLLING;
124 }
125
126 /*
127 * On SMP it's slightly faster (but much more power-consuming!)
128 * to poll the ->need_resched flag instead of waiting for the
129 * cross-CPU IPI to arrive. Use this option with caution.
130 */
131 static void poll_idle (void)
132 {
133 local_irq_enable();
134
135 asm volatile(
136 "2:"
137 "testl %0,%1;"
138 "rep; nop;"
139 "je 2b;"
140 : :
141 "i" (_TIF_NEED_RESCHED),
142 "m" (current_thread_info()->flags));
143 }
144
145 void cpu_idle_wait(void)
146 {
147 unsigned int cpu, this_cpu = get_cpu();
148 cpumask_t map;
149
150 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
151 put_cpu();
152
153 cpus_clear(map);
154 for_each_online_cpu(cpu) {
155 per_cpu(cpu_idle_state, cpu) = 1;
156 cpu_set(cpu, map);
157 }
158
159 __get_cpu_var(cpu_idle_state) = 0;
160
161 wmb();
162 do {
163 ssleep(1);
164 for_each_online_cpu(cpu) {
165 if (cpu_isset(cpu, map) &&
166 !per_cpu(cpu_idle_state, cpu))
167 cpu_clear(cpu, map);
168 }
169 cpus_and(map, map, cpu_online_map);
170 } while (!cpus_empty(map));
171 }
172 EXPORT_SYMBOL_GPL(cpu_idle_wait);
173
174 #ifdef CONFIG_HOTPLUG_CPU
175 DECLARE_PER_CPU(int, cpu_state);
176
177 #include <asm/nmi.h>
178 /* We halt the CPU with physical CPU hotplug */
179 static inline void play_dead(void)
180 {
181 idle_task_exit();
182 wbinvd();
183 mb();
184 /* Ack it */
185 __get_cpu_var(cpu_state) = CPU_DEAD;
186
187 local_irq_disable();
188 while (1)
189 halt();
190 }
191 #else
192 static inline void play_dead(void)
193 {
194 BUG();
195 }
196 #endif /* CONFIG_HOTPLUG_CPU */
197
198 /*
199 * The idle thread. There's no useful work to be
200 * done, so just try to conserve power and have a
201 * low exit latency (ie sit in a loop waiting for
202 * somebody to say that they'd like to reschedule)
203 */
204 void cpu_idle (void)
205 {
206 current_thread_info()->status |= TS_POLLING;
207 /* endless idle loop with no priority at all */
208 while (1) {
209 while (!need_resched()) {
210 void (*idle)(void);
211
212 if (__get_cpu_var(cpu_idle_state))
213 __get_cpu_var(cpu_idle_state) = 0;
214
215 rmb();
216 idle = pm_idle;
217 if (!idle)
218 idle = default_idle;
219 if (cpu_is_offline(smp_processor_id()))
220 play_dead();
221 enter_idle();
222 idle();
223 /* In many cases the interrupt that ended idle
224 has already called exit_idle. But some idle
225 loops can be woken up without interrupt. */
226 __exit_idle();
227 }
228
229 preempt_enable_no_resched();
230 schedule();
231 preempt_disable();
232 }
233 }
234
235 /*
236 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
237 * which can obviate IPI to trigger checking of need_resched.
238 * We execute MONITOR against need_resched and enter optimized wait state
239 * through MWAIT. Whenever someone changes need_resched, we would be woken
240 * up from MWAIT (without an IPI).
241 */
242 static void mwait_idle(void)
243 {
244 local_irq_enable();
245
246 while (!need_resched()) {
247 __monitor((void *)&current_thread_info()->flags, 0, 0);
248 smp_mb();
249 if (need_resched())
250 break;
251 __mwait(0, 0);
252 }
253 }
254
255 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
256 {
257 static int printed;
258 if (cpu_has(c, X86_FEATURE_MWAIT)) {
259 /*
260 * Skip, if setup has overridden idle.
261 * One CPU supports mwait => All CPUs supports mwait
262 */
263 if (!pm_idle) {
264 if (!printed) {
265 printk("using mwait in idle threads.\n");
266 printed = 1;
267 }
268 pm_idle = mwait_idle;
269 }
270 }
271 }
272
273 static int __init idle_setup (char *str)
274 {
275 if (!strncmp(str, "poll", 4)) {
276 printk("using polling idle threads.\n");
277 pm_idle = poll_idle;
278 }
279
280 boot_option_idle_override = 1;
281 return 1;
282 }
283
284 __setup("idle=", idle_setup);
285
286 /* Prints also some state that isn't saved in the pt_regs */
287 void __show_regs(struct pt_regs * regs)
288 {
289 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
290 unsigned int fsindex,gsindex;
291 unsigned int ds,cs,es;
292
293 printk("\n");
294 print_modules();
295 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
296 current->pid, current->comm, print_tainted(),
297 init_utsname()->release,
298 (int)strcspn(init_utsname()->version, " "),
299 init_utsname()->version);
300 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
301 printk_address(regs->rip);
302 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
303 regs->eflags);
304 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
305 regs->rax, regs->rbx, regs->rcx);
306 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
307 regs->rdx, regs->rsi, regs->rdi);
308 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
309 regs->rbp, regs->r8, regs->r9);
310 printk("R10: %016lx R11: %016lx R12: %016lx\n",
311 regs->r10, regs->r11, regs->r12);
312 printk("R13: %016lx R14: %016lx R15: %016lx\n",
313 regs->r13, regs->r14, regs->r15);
314
315 asm("movl %%ds,%0" : "=r" (ds));
316 asm("movl %%cs,%0" : "=r" (cs));
317 asm("movl %%es,%0" : "=r" (es));
318 asm("movl %%fs,%0" : "=r" (fsindex));
319 asm("movl %%gs,%0" : "=r" (gsindex));
320
321 rdmsrl(MSR_FS_BASE, fs);
322 rdmsrl(MSR_GS_BASE, gs);
323 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
324
325 asm("movq %%cr0, %0": "=r" (cr0));
326 asm("movq %%cr2, %0": "=r" (cr2));
327 asm("movq %%cr3, %0": "=r" (cr3));
328 asm("movq %%cr4, %0": "=r" (cr4));
329
330 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
331 fs,fsindex,gs,gsindex,shadowgs);
332 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
333 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
334 }
335
336 void show_regs(struct pt_regs *regs)
337 {
338 printk("CPU %d:", smp_processor_id());
339 __show_regs(regs);
340 show_trace(NULL, regs, (void *)(regs + 1));
341 }
342
343 /*
344 * Free current thread data structures etc..
345 */
346 void exit_thread(void)
347 {
348 struct task_struct *me = current;
349 struct thread_struct *t = &me->thread;
350
351 if (me->thread.io_bitmap_ptr) {
352 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
353
354 kfree(t->io_bitmap_ptr);
355 t->io_bitmap_ptr = NULL;
356 clear_thread_flag(TIF_IO_BITMAP);
357 /*
358 * Careful, clear this in the TSS too:
359 */
360 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
361 t->io_bitmap_max = 0;
362 put_cpu();
363 }
364 }
365
366 void flush_thread(void)
367 {
368 struct task_struct *tsk = current;
369 struct thread_info *t = current_thread_info();
370
371 if (t->flags & _TIF_ABI_PENDING) {
372 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
373 if (t->flags & _TIF_IA32)
374 current_thread_info()->status |= TS_COMPAT;
375 }
376 t->flags &= ~_TIF_DEBUG;
377
378 tsk->thread.debugreg0 = 0;
379 tsk->thread.debugreg1 = 0;
380 tsk->thread.debugreg2 = 0;
381 tsk->thread.debugreg3 = 0;
382 tsk->thread.debugreg6 = 0;
383 tsk->thread.debugreg7 = 0;
384 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
385 /*
386 * Forget coprocessor state..
387 */
388 clear_fpu(tsk);
389 clear_used_math();
390 }
391
392 void release_thread(struct task_struct *dead_task)
393 {
394 if (dead_task->mm) {
395 if (dead_task->mm->context.size) {
396 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
397 dead_task->comm,
398 dead_task->mm->context.ldt,
399 dead_task->mm->context.size);
400 BUG();
401 }
402 }
403 }
404
405 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
406 {
407 struct user_desc ud = {
408 .base_addr = addr,
409 .limit = 0xfffff,
410 .seg_32bit = 1,
411 .limit_in_pages = 1,
412 .useable = 1,
413 };
414 struct n_desc_struct *desc = (void *)t->thread.tls_array;
415 desc += tls;
416 desc->a = LDT_entry_a(&ud);
417 desc->b = LDT_entry_b(&ud);
418 }
419
420 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
421 {
422 struct desc_struct *desc = (void *)t->thread.tls_array;
423 desc += tls;
424 return desc->base0 |
425 (((u32)desc->base1) << 16) |
426 (((u32)desc->base2) << 24);
427 }
428
429 /*
430 * This gets called before we allocate a new thread and copy
431 * the current task into it.
432 */
433 void prepare_to_copy(struct task_struct *tsk)
434 {
435 unlazy_fpu(tsk);
436 }
437
438 int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
439 unsigned long unused,
440 struct task_struct * p, struct pt_regs * regs)
441 {
442 int err;
443 struct pt_regs * childregs;
444 struct task_struct *me = current;
445
446 childregs = ((struct pt_regs *)
447 (THREAD_SIZE + task_stack_page(p))) - 1;
448 *childregs = *regs;
449
450 childregs->rax = 0;
451 childregs->rsp = rsp;
452 if (rsp == ~0UL)
453 childregs->rsp = (unsigned long)childregs;
454
455 p->thread.rsp = (unsigned long) childregs;
456 p->thread.rsp0 = (unsigned long) (childregs+1);
457 p->thread.userrsp = me->thread.userrsp;
458
459 set_tsk_thread_flag(p, TIF_FORK);
460
461 p->thread.fs = me->thread.fs;
462 p->thread.gs = me->thread.gs;
463
464 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
465 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
466 asm("mov %%es,%0" : "=m" (p->thread.es));
467 asm("mov %%ds,%0" : "=m" (p->thread.ds));
468
469 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
470 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
471 if (!p->thread.io_bitmap_ptr) {
472 p->thread.io_bitmap_max = 0;
473 return -ENOMEM;
474 }
475 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
476 IO_BITMAP_BYTES);
477 set_tsk_thread_flag(p, TIF_IO_BITMAP);
478 }
479
480 /*
481 * Set a new TLS for the child thread?
482 */
483 if (clone_flags & CLONE_SETTLS) {
484 #ifdef CONFIG_IA32_EMULATION
485 if (test_thread_flag(TIF_IA32))
486 err = ia32_child_tls(p, childregs);
487 else
488 #endif
489 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
490 if (err)
491 goto out;
492 }
493 err = 0;
494 out:
495 if (err && p->thread.io_bitmap_ptr) {
496 kfree(p->thread.io_bitmap_ptr);
497 p->thread.io_bitmap_max = 0;
498 }
499 return err;
500 }
501
502 /*
503 * This special macro can be used to load a debugging register
504 */
505 #define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
506
507 static inline void __switch_to_xtra(struct task_struct *prev_p,
508 struct task_struct *next_p,
509 struct tss_struct *tss)
510 {
511 struct thread_struct *prev, *next;
512
513 prev = &prev_p->thread,
514 next = &next_p->thread;
515
516 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
517 loaddebug(next, 0);
518 loaddebug(next, 1);
519 loaddebug(next, 2);
520 loaddebug(next, 3);
521 /* no 4 and 5 */
522 loaddebug(next, 6);
523 loaddebug(next, 7);
524 }
525
526 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
527 /*
528 * Copy the relevant range of the IO bitmap.
529 * Normally this is 128 bytes or less:
530 */
531 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
532 max(prev->io_bitmap_max, next->io_bitmap_max));
533 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
534 /*
535 * Clear any possible leftover bits:
536 */
537 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
538 }
539 }
540
541 /*
542 * switch_to(x,y) should switch tasks from x to y.
543 *
544 * This could still be optimized:
545 * - fold all the options into a flag word and test it with a single test.
546 * - could test fs/gs bitsliced
547 *
548 * Kprobes not supported here. Set the probe on schedule instead.
549 */
550 __kprobes struct task_struct *
551 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
552 {
553 struct thread_struct *prev = &prev_p->thread,
554 *next = &next_p->thread;
555 int cpu = smp_processor_id();
556 struct tss_struct *tss = &per_cpu(init_tss, cpu);
557
558 /* we're going to use this soon, after a few expensive things */
559 if (next_p->fpu_counter>5)
560 prefetch(&next->i387.fxsave);
561
562 /*
563 * Reload esp0, LDT and the page table pointer:
564 */
565 tss->rsp0 = next->rsp0;
566
567 /*
568 * Switch DS and ES.
569 * This won't pick up thread selector changes, but I guess that is ok.
570 */
571 asm volatile("mov %%es,%0" : "=m" (prev->es));
572 if (unlikely(next->es | prev->es))
573 loadsegment(es, next->es);
574
575 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
576 if (unlikely(next->ds | prev->ds))
577 loadsegment(ds, next->ds);
578
579 load_TLS(next, cpu);
580
581 /*
582 * Switch FS and GS.
583 */
584 {
585 unsigned fsindex;
586 asm volatile("movl %%fs,%0" : "=r" (fsindex));
587 /* segment register != 0 always requires a reload.
588 also reload when it has changed.
589 when prev process used 64bit base always reload
590 to avoid an information leak. */
591 if (unlikely(fsindex | next->fsindex | prev->fs)) {
592 loadsegment(fs, next->fsindex);
593 /* check if the user used a selector != 0
594 * if yes clear 64bit base, since overloaded base
595 * is always mapped to the Null selector
596 */
597 if (fsindex)
598 prev->fs = 0;
599 }
600 /* when next process has a 64bit base use it */
601 if (next->fs)
602 wrmsrl(MSR_FS_BASE, next->fs);
603 prev->fsindex = fsindex;
604 }
605 {
606 unsigned gsindex;
607 asm volatile("movl %%gs,%0" : "=r" (gsindex));
608 if (unlikely(gsindex | next->gsindex | prev->gs)) {
609 load_gs_index(next->gsindex);
610 if (gsindex)
611 prev->gs = 0;
612 }
613 if (next->gs)
614 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
615 prev->gsindex = gsindex;
616 }
617
618 /*
619 * Switch the PDA and FPU contexts.
620 */
621 prev->userrsp = read_pda(oldrsp);
622 write_pda(oldrsp, next->userrsp);
623 write_pda(pcurrent, next_p);
624
625 /* This must be here to ensure both math_state_restore() and
626 kernel_fpu_begin() work consistently.
627 And the AMD workaround requires it to be after DS reload. */
628 unlazy_fpu(prev_p);
629 write_pda(kernelstack,
630 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
631 #ifdef CONFIG_CC_STACKPROTECTOR
632 write_pda(stack_canary, next_p->stack_canary);
633 /*
634 * Build time only check to make sure the stack_canary is at
635 * offset 40 in the pda; this is a gcc ABI requirement
636 */
637 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
638 #endif
639
640 /*
641 * Now maybe reload the debug registers and handle I/O bitmaps
642 */
643 if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
644 || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
645 __switch_to_xtra(prev_p, next_p, tss);
646
647 /* If the task has used fpu the last 5 timeslices, just do a full
648 * restore of the math state immediately to avoid the trap; the
649 * chances of needing FPU soon are obviously high now
650 */
651 if (next_p->fpu_counter>5)
652 math_state_restore();
653 return prev_p;
654 }
655
656 /*
657 * sys_execve() executes a new program.
658 */
659 asmlinkage
660 long sys_execve(char __user *name, char __user * __user *argv,
661 char __user * __user *envp, struct pt_regs regs)
662 {
663 long error;
664 char * filename;
665
666 filename = getname(name);
667 error = PTR_ERR(filename);
668 if (IS_ERR(filename))
669 return error;
670 error = do_execve(filename, argv, envp, &regs);
671 if (error == 0) {
672 task_lock(current);
673 current->ptrace &= ~PT_DTRACE;
674 task_unlock(current);
675 }
676 putname(filename);
677 return error;
678 }
679
680 void set_personality_64bit(void)
681 {
682 /* inherit personality from parent */
683
684 /* Make sure to be in 64bit mode */
685 clear_thread_flag(TIF_IA32);
686
687 /* TBD: overwrites user setup. Should have two bits.
688 But 64bit processes have always behaved this way,
689 so it's not too bad. The main problem is just that
690 32bit childs are affected again. */
691 current->personality &= ~READ_IMPLIES_EXEC;
692 }
693
694 asmlinkage long sys_fork(struct pt_regs *regs)
695 {
696 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
697 }
698
699 asmlinkage long
700 sys_clone(unsigned long clone_flags, unsigned long newsp,
701 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
702 {
703 if (!newsp)
704 newsp = regs->rsp;
705 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
706 }
707
708 /*
709 * This is trivial, and on the face of it looks like it
710 * could equally well be done in user mode.
711 *
712 * Not so, for quite unobvious reasons - register pressure.
713 * In user mode vfork() cannot have a stack frame, and if
714 * done by calling the "clone()" system call directly, you
715 * do not have enough call-clobbered registers to hold all
716 * the information you need.
717 */
718 asmlinkage long sys_vfork(struct pt_regs *regs)
719 {
720 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
721 NULL, NULL);
722 }
723
724 unsigned long get_wchan(struct task_struct *p)
725 {
726 unsigned long stack;
727 u64 fp,rip;
728 int count = 0;
729
730 if (!p || p == current || p->state==TASK_RUNNING)
731 return 0;
732 stack = (unsigned long)task_stack_page(p);
733 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
734 return 0;
735 fp = *(u64 *)(p->thread.rsp);
736 do {
737 if (fp < (unsigned long)stack ||
738 fp > (unsigned long)stack+THREAD_SIZE)
739 return 0;
740 rip = *(u64 *)(fp+8);
741 if (!in_sched_functions(rip))
742 return rip;
743 fp = *(u64 *)fp;
744 } while (count++ < 16);
745 return 0;
746 }
747
748 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
749 {
750 int ret = 0;
751 int doit = task == current;
752 int cpu;
753
754 switch (code) {
755 case ARCH_SET_GS:
756 if (addr >= TASK_SIZE_OF(task))
757 return -EPERM;
758 cpu = get_cpu();
759 /* handle small bases via the GDT because that's faster to
760 switch. */
761 if (addr <= 0xffffffff) {
762 set_32bit_tls(task, GS_TLS, addr);
763 if (doit) {
764 load_TLS(&task->thread, cpu);
765 load_gs_index(GS_TLS_SEL);
766 }
767 task->thread.gsindex = GS_TLS_SEL;
768 task->thread.gs = 0;
769 } else {
770 task->thread.gsindex = 0;
771 task->thread.gs = addr;
772 if (doit) {
773 load_gs_index(0);
774 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
775 }
776 }
777 put_cpu();
778 break;
779 case ARCH_SET_FS:
780 /* Not strictly needed for fs, but do it for symmetry
781 with gs */
782 if (addr >= TASK_SIZE_OF(task))
783 return -EPERM;
784 cpu = get_cpu();
785 /* handle small bases via the GDT because that's faster to
786 switch. */
787 if (addr <= 0xffffffff) {
788 set_32bit_tls(task, FS_TLS, addr);
789 if (doit) {
790 load_TLS(&task->thread, cpu);
791 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
792 }
793 task->thread.fsindex = FS_TLS_SEL;
794 task->thread.fs = 0;
795 } else {
796 task->thread.fsindex = 0;
797 task->thread.fs = addr;
798 if (doit) {
799 /* set the selector to 0 to not confuse
800 __switch_to */
801 asm volatile("movl %0,%%fs" :: "r" (0));
802 ret = checking_wrmsrl(MSR_FS_BASE, addr);
803 }
804 }
805 put_cpu();
806 break;
807 case ARCH_GET_FS: {
808 unsigned long base;
809 if (task->thread.fsindex == FS_TLS_SEL)
810 base = read_32bit_tls(task, FS_TLS);
811 else if (doit)
812 rdmsrl(MSR_FS_BASE, base);
813 else
814 base = task->thread.fs;
815 ret = put_user(base, (unsigned long __user *)addr);
816 break;
817 }
818 case ARCH_GET_GS: {
819 unsigned long base;
820 unsigned gsindex;
821 if (task->thread.gsindex == GS_TLS_SEL)
822 base = read_32bit_tls(task, GS_TLS);
823 else if (doit) {
824 asm("movl %%gs,%0" : "=r" (gsindex));
825 if (gsindex)
826 rdmsrl(MSR_KERNEL_GS_BASE, base);
827 else
828 base = task->thread.gs;
829 }
830 else
831 base = task->thread.gs;
832 ret = put_user(base, (unsigned long __user *)addr);
833 break;
834 }
835
836 default:
837 ret = -EINVAL;
838 break;
839 }
840
841 return ret;
842 }
843
844 long sys_arch_prctl(int code, unsigned long addr)
845 {
846 return do_arch_prctl(current, code, addr);
847 }
848
849 /*
850 * Capture the user space registers if the task is not running (in user space)
851 */
852 int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
853 {
854 struct pt_regs *pp, ptregs;
855
856 pp = task_pt_regs(tsk);
857
858 ptregs = *pp;
859 ptregs.cs &= 0xffff;
860 ptregs.ss &= 0xffff;
861
862 elf_core_copy_regs(regs, &ptregs);
863
864 return 1;
865 }
866
867 unsigned long arch_align_stack(unsigned long sp)
868 {
869 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
870 sp -= get_random_int() % 8192;
871 return sp & ~0xf;
872 }