mov ar.rsc=0 // place RSE in enforced lazy mode
;;
loadrs // clear the dirty partition
- mov IA64_KR(PER_CPU_DATA)=r0 // clear physical per-CPU base
+ movl r19=__phys_per_cpu_start
+ mov r18=PERCPU_PAGE_SIZE
+ ;;
+#ifndef CONFIG_SMP
+ add r19=r19,r18
+ ;;
+#else
+(isAP) br.few 2f
+ mov r20=r19
+ sub r19=r19,r18
+ ;;
+ shr.u r18=r18,3
+1:
+ ld8 r21=[r20],8;;
+ st8[r19]=r21,8
+ adds r18=-1,r18;;
+ cmp4.lt p7,p6=0,r18
+(p7) br.cond.dptk.few 1b
+2:
+#endif
+ tpa r19=r19
+ ;;
+ .pred.rel.mutex isBP,isAP
+(isBP) mov IA64_KR(PER_CPU_DATA)=r19 // per-CPU base for cpu0
+(isAP) mov IA64_KR(PER_CPU_DATA)=r0 // clear physical per-CPU base
;;
mov ar.bspstore=r2 // establish the new RSE stack
;;
if (smp_processor_id() == 0) {
cpu_set(0, per_cpu(cpu_sibling_map, 0));
cpu_set(0, cpu_core_map[0]);
+ } else {
+ /*
+ * Set ar.k3 so that assembly code in MCA handler can compute
+ * physical addresses of per cpu variables with a simple:
+ * phys = ar.k3 + &per_cpu_var
+ * and the alt-dtlb-miss handler can set per-cpu mapping into
+ * the TLB when needed. head.S already did this for cpu0.
+ */
+ ia64_set_kr(IA64_KR_PER_CPU_DATA,
+ ia64_tpa(cpu_data) - (long) __per_cpu_start);
}
#endif
- /*
- * We set ar.k3 so that assembly code in MCA handler can compute
- * physical addresses of per cpu variables with a simple:
- * phys = ar.k3 + &per_cpu_var
- */
- ia64_set_kr(IA64_KR_PER_CPU_DATA,
- ia64_tpa(cpu_data) - (long) __per_cpu_start);
-
get_max_cacheline_size();
/*
{
/* Early console may use I/O ports */
ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
+#ifndef CONFIG_PRINTK_TIME
Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id());
+#endif
efi_map_pal_code();
cpu_init();
preempt_disable();
/* Per-cpu data: */
percpu : { } :percpu
. = ALIGN(PERCPU_PAGE_SIZE);
+#ifdef CONFIG_SMP
+ . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */
+#endif
__phys_per_cpu_start = .;
.data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
{
* get_zeroed_page().
*/
if (first_time) {
+ void *cpu0_data = __phys_per_cpu_start - PERCPU_PAGE_SIZE;
+
first_time=0;
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
+
+ __per_cpu_offset[0] = (char *) cpu0_data - __per_cpu_start;
+ per_cpu(local_per_cpu_offset, 0) = __per_cpu_offset[0];
+
+ for (cpu = 1; cpu < NR_CPUS; cpu++) {
memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
__per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
cpu_data += PERCPU_PAGE_SIZE;
static inline void
alloc_per_cpu_data(void)
{
- cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
+ cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS-1,
PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
}
#else
int cpu;
for_each_possible_early_cpu(cpu) {
- if (node == node_cpuid[cpu].nid) {
+ if (cpu == 0) {
+ void *cpu0_data = __phys_per_cpu_start - PERCPU_PAGE_SIZE;
+ __per_cpu_offset[cpu] = (char*)cpu0_data -
+ __per_cpu_start;
+ } else if (node == node_cpuid[cpu].nid) {
memcpy(__va(cpu_data), __phys_per_cpu_start,
__per_cpu_end - __per_cpu_start);
__per_cpu_offset[cpu] = (char*)__va(cpu_data) -