[ARM] 3256/1: Make the function-returning ldm's use sp as the base register
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / arm / boot / compressed / head.S
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/arm/boot/compressed/head.S
3 *
4 * Copyright (C) 1996-2002 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/config.h>
11#include <linux/linkage.h>
12
13/*
14 * Debugging stuff
15 *
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable. Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
19 */
20#ifdef DEBUG
5cd0c344 21
5cd0c344 22#if defined(CONFIG_DEBUG_ICEDCC)
1da177e4
LT
23 .macro loadsp, rb
24 .endm
224b5be6 25 .macro writeb, ch, rb
5cd0c344 26 mcr p14, 0, \ch, c0, c1, 0
1da177e4 27 .endm
5cd0c344 28#else
224b5be6
RK
29
30#include <asm/arch/debug-macro.S>
31
5cd0c344
RK
32 .macro writeb, ch, rb
33 senduart \ch, \rb
1da177e4 34 .endm
5cd0c344 35
224b5be6 36#if defined(CONFIG_ARCH_SA1100)
1da177e4
LT
37 .macro loadsp, rb
38 mov \rb, #0x80000000 @ physical base address
224b5be6 39#ifdef CONFIG_DEBUG_LL_SER3
1da177e4 40 add \rb, \rb, #0x00050000 @ Ser3
224b5be6 41#else
1da177e4 42 add \rb, \rb, #0x00010000 @ Ser1
224b5be6 43#endif
1da177e4 44 .endm
1da177e4
LT
45#elif defined(CONFIG_ARCH_IOP331)
46 .macro loadsp, rb
47 mov \rb, #0xff000000
48 orr \rb, \rb, #0x00ff0000
49 orr \rb, \rb, #0x0000f700 @ location of the UART
50 .endm
1da177e4 51#elif defined(CONFIG_ARCH_S3C2410)
5cd0c344 52 .macro loadsp, rb
1da177e4
LT
53 mov \rb, #0x50000000
54 add \rb, \rb, #0x4000 * CONFIG_S3C2410_LOWLEVEL_UART_PORT
55 .endm
1da177e4 56#else
224b5be6
RK
57 .macro loadsp, rb
58 addruart \rb
59 .endm
1da177e4 60#endif
5cd0c344 61#endif
1da177e4
LT
62#endif
63
64 .macro kputc,val
65 mov r0, \val
66 bl putc
67 .endm
68
69 .macro kphex,val,len
70 mov r0, \val
71 mov r1, #\len
72 bl phex
73 .endm
74
75 .macro debug_reloc_start
76#ifdef DEBUG
77 kputc #'\n'
78 kphex r6, 8 /* processor id */
79 kputc #':'
80 kphex r7, 8 /* architecture id */
81 kputc #':'
82 mrc p15, 0, r0, c1, c0
83 kphex r0, 8 /* control reg */
84 kputc #'\n'
85 kphex r5, 8 /* decompressed kernel start */
86 kputc #'-'
87 kphex r8, 8 /* decompressed kernel end */
88 kputc #'>'
89 kphex r4, 8 /* kernel execution address */
90 kputc #'\n'
91#endif
92 .endm
93
94 .macro debug_reloc_end
95#ifdef DEBUG
96 kphex r5, 8 /* end of kernel */
97 kputc #'\n'
98 mov r0, r4
99 bl memdump /* dump 256 bytes at start of kernel */
100#endif
101 .endm
102
103 .section ".start", #alloc, #execinstr
104/*
105 * sort out different calling conventions
106 */
107 .align
108start:
109 .type start,#function
110 .rept 8
111 mov r0, r0
112 .endr
113
114 b 1f
115 .word 0x016f2818 @ Magic numbers to help the loader
116 .word start @ absolute load/run zImage address
117 .word _edata @ zImage end address
1181: mov r7, r1 @ save architecture ID
119 mov r8, #0 @ save r0
120
121#ifndef __ARM_ARCH_2__
122 /*
123 * Booting from Angel - need to enter SVC mode and disable
124 * FIQs/IRQs (numeric definitions from angel arm.h source).
125 * We only do this if we were in user mode on entry.
126 */
127 mrs r2, cpsr @ get current mode
128 tst r2, #3 @ not user?
129 bne not_angel
130 mov r0, #0x17 @ angel_SWIreason_EnterSVC
131 swi 0x123456 @ angel_SWI_ARM
132not_angel:
133 mrs r2, cpsr @ turn off interrupts to
134 orr r2, r2, #0xc0 @ prevent angel from running
135 msr cpsr_c, r2
136#else
137 teqp pc, #0x0c000003 @ turn off interrupts
138#endif
139
140 /*
141 * Note that some cache flushing and other stuff may
142 * be needed here - is there an Angel SWI call for this?
143 */
144
145 /*
146 * some architecture specific code can be inserted
147 * by the linker here, but it should preserve r7 and r8.
148 */
149
150 .text
151 adr r0, LC0
152 ldmia r0, {r1, r2, r3, r4, r5, r6, ip, sp}
153 subs r0, r0, r1 @ calculate the delta offset
154
155 @ if delta is zero, we are
156 beq not_relocated @ running at the address we
157 @ were linked at.
158
159 /*
160 * We're running at a different address. We need to fix
161 * up various pointers:
162 * r5 - zImage base address
163 * r6 - GOT start
164 * ip - GOT end
165 */
166 add r5, r5, r0
167 add r6, r6, r0
168 add ip, ip, r0
169
170#ifndef CONFIG_ZBOOT_ROM
171 /*
172 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
173 * we need to fix up pointers into the BSS region.
174 * r2 - BSS start
175 * r3 - BSS end
176 * sp - stack pointer
177 */
178 add r2, r2, r0
179 add r3, r3, r0
180 add sp, sp, r0
181
182 /*
183 * Relocate all entries in the GOT table.
184 */
1851: ldr r1, [r6, #0] @ relocate entries in the GOT
186 add r1, r1, r0 @ table. This fixes up the
187 str r1, [r6], #4 @ C references.
188 cmp r6, ip
189 blo 1b
190#else
191
192 /*
193 * Relocate entries in the GOT table. We only relocate
194 * the entries that are outside the (relocated) BSS region.
195 */
1961: ldr r1, [r6, #0] @ relocate entries in the GOT
197 cmp r1, r2 @ entry < bss_start ||
198 cmphs r3, r1 @ _end < entry
199 addlo r1, r1, r0 @ table. This fixes up the
200 str r1, [r6], #4 @ C references.
201 cmp r6, ip
202 blo 1b
203#endif
204
205not_relocated: mov r0, #0
2061: str r0, [r2], #4 @ clear bss
207 str r0, [r2], #4
208 str r0, [r2], #4
209 str r0, [r2], #4
210 cmp r2, r3
211 blo 1b
212
213 /*
214 * The C runtime environment should now be setup
215 * sufficiently. Turn the cache on, set up some
216 * pointers, and start decompressing.
217 */
218 bl cache_on
219
220 mov r1, sp @ malloc space above stack
221 add r2, sp, #0x10000 @ 64k max
222
223/*
224 * Check to see if we will overwrite ourselves.
225 * r4 = final kernel address
226 * r5 = start of this image
227 * r2 = end of malloc space (and therefore this image)
228 * We basically want:
229 * r4 >= r2 -> OK
230 * r4 + image length <= r5 -> OK
231 */
232 cmp r4, r2
233 bhs wont_overwrite
234 add r0, r4, #4096*1024 @ 4MB largest kernel size
235 cmp r0, r5
236 bls wont_overwrite
237
238 mov r5, r2 @ decompress after malloc space
239 mov r0, r5
240 mov r3, r7
241 bl decompress_kernel
242
243 add r0, r0, #127
244 bic r0, r0, #127 @ align the kernel length
245/*
246 * r0 = decompressed kernel length
247 * r1-r3 = unused
248 * r4 = kernel execution address
249 * r5 = decompressed kernel start
250 * r6 = processor ID
251 * r7 = architecture ID
252 * r8-r14 = unused
253 */
254 add r1, r5, r0 @ end of decompressed kernel
255 adr r2, reloc_start
256 ldr r3, LC1
257 add r3, r2, r3
2581: ldmia r2!, {r8 - r13} @ copy relocation code
259 stmia r1!, {r8 - r13}
260 ldmia r2!, {r8 - r13}
261 stmia r1!, {r8 - r13}
262 cmp r2, r3
263 blo 1b
264
265 bl cache_clean_flush
266 add pc, r5, r0 @ call relocation code
267
268/*
269 * We're not in danger of overwriting ourselves. Do this the simple way.
270 *
271 * r4 = kernel execution address
272 * r7 = architecture ID
273 */
274wont_overwrite: mov r0, r4
275 mov r3, r7
276 bl decompress_kernel
277 b call_kernel
278
279 .type LC0, #object
280LC0: .word LC0 @ r1
281 .word __bss_start @ r2
282 .word _end @ r3
283 .word zreladdr @ r4
284 .word _start @ r5
285 .word _got_start @ r6
286 .word _got_end @ ip
287 .word user_stack+4096 @ sp
288LC1: .word reloc_end - reloc_start
289 .size LC0, . - LC0
290
291#ifdef CONFIG_ARCH_RPC
292 .globl params
293params: ldr r0, =params_phys
294 mov pc, lr
295 .ltorg
296 .align
297#endif
298
299/*
300 * Turn on the cache. We need to setup some page tables so that we
301 * can have both the I and D caches on.
302 *
303 * We place the page tables 16k down from the kernel execution address,
304 * and we hope that nothing else is using it. If we're using it, we
305 * will go pop!
306 *
307 * On entry,
308 * r4 = kernel execution address
309 * r6 = processor ID
310 * r7 = architecture number
311 * r8 = run-time address of "start"
312 * On exit,
313 * r1, r2, r3, r8, r9, r12 corrupted
314 * This routine must preserve:
315 * r4, r5, r6, r7
316 */
317 .align 5
318cache_on: mov r3, #8 @ cache_on function
319 b call_cache_fn
320
321__setup_mmu: sub r3, r4, #16384 @ Page directory size
322 bic r3, r3, #0xff @ Align the pointer
323 bic r3, r3, #0x3f00
324/*
325 * Initialise the page tables, turning on the cacheable and bufferable
326 * bits for the RAM area only.
327 */
328 mov r0, r3
329 mov r8, r0, lsr #18
330 mov r8, r8, lsl #18 @ start of RAM
331 add r9, r8, #0x10000000 @ a reasonable RAM size
332 mov r1, #0x12
333 orr r1, r1, #3 << 10
334 add r2, r3, #16384
3351: cmp r1, r8 @ if virt > start of RAM
336 orrhs r1, r1, #0x0c @ set cacheable, bufferable
337 cmp r1, r9 @ if virt > end of RAM
338 bichs r1, r1, #0x0c @ clear cacheable, bufferable
339 str r1, [r0], #4 @ 1:1 mapping
340 add r1, r1, #1048576
341 teq r0, r2
342 bne 1b
343/*
344 * If ever we are running from Flash, then we surely want the cache
345 * to be enabled also for our execution instance... We map 2MB of it
346 * so there is no map overlap problem for up to 1 MB compressed kernel.
347 * If the execution is in RAM then we would only be duplicating the above.
348 */
349 mov r1, #0x1e
350 orr r1, r1, #3 << 10
351 mov r2, pc, lsr #20
352 orr r1, r1, r2, lsl #20
353 add r0, r3, r2, lsl #2
354 str r1, [r0], #4
355 add r1, r1, #1048576
356 str r1, [r0]
357 mov pc, lr
358
359__armv4_cache_on:
360 mov r12, lr
361 bl __setup_mmu
362 mov r0, #0
363 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
364 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
365 mrc p15, 0, r0, c1, c0, 0 @ read control reg
366 orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement
367 orr r0, r0, #0x0030
368 bl __common_cache_on
369 mov r0, #0
370 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
371 mov pc, r12
372
373__arm6_cache_on:
374 mov r12, lr
375 bl __setup_mmu
376 mov r0, #0
377 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
378 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3
379 mov r0, #0x30
380 bl __common_cache_on
381 mov r0, #0
382 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3
383 mov pc, r12
384
385__common_cache_on:
386#ifndef DEBUG
387 orr r0, r0, #0x000d @ Write buffer, mmu
388#endif
389 mov r1, #-1
390 mcr p15, 0, r3, c2, c0, 0 @ load page table pointer
391 mcr p15, 0, r1, c3, c0, 0 @ load domain access control
392 mcr p15, 0, r0, c1, c0, 0 @ load control register
393 mov pc, lr
394
395/*
396 * All code following this line is relocatable. It is relocated by
397 * the above code to the end of the decompressed kernel image and
398 * executed there. During this time, we have no stacks.
399 *
400 * r0 = decompressed kernel length
401 * r1-r3 = unused
402 * r4 = kernel execution address
403 * r5 = decompressed kernel start
404 * r6 = processor ID
405 * r7 = architecture ID
406 * r8-r14 = unused
407 */
408 .align 5
409reloc_start: add r8, r5, r0
410 debug_reloc_start
411 mov r1, r4
4121:
413 .rept 4
414 ldmia r5!, {r0, r2, r3, r9 - r13} @ relocate kernel
415 stmia r1!, {r0, r2, r3, r9 - r13}
416 .endr
417
418 cmp r5, r8
419 blo 1b
420 debug_reloc_end
421
422call_kernel: bl cache_clean_flush
423 bl cache_off
424 mov r0, #0
425 mov r1, r7 @ restore architecture number
426 mov pc, r4 @ call kernel
427
428/*
429 * Here follow the relocatable cache support functions for the
430 * various processors. This is a generic hook for locating an
431 * entry and jumping to an instruction at the specified offset
432 * from the start of the block. Please note this is all position
433 * independent code.
434 *
435 * r1 = corrupted
436 * r2 = corrupted
437 * r3 = block offset
438 * r6 = corrupted
439 * r12 = corrupted
440 */
441
442call_cache_fn: adr r12, proc_types
443 mrc p15, 0, r6, c0, c0 @ get processor ID
4441: ldr r1, [r12, #0] @ get value
445 ldr r2, [r12, #4] @ get mask
446 eor r1, r1, r6 @ (real ^ match)
447 tst r1, r2 @ & mask
448 addeq pc, r12, r3 @ call cache function
449 add r12, r12, #4*5
450 b 1b
451
452/*
453 * Table for cache operations. This is basically:
454 * - CPU ID match
455 * - CPU ID mask
456 * - 'cache on' method instruction
457 * - 'cache off' method instruction
458 * - 'cache flush' method instruction
459 *
460 * We match an entry using: ((real_id ^ match) & mask) == 0
461 *
462 * Writethrough caches generally only need 'on' and 'off'
463 * methods. Writeback caches _must_ have the flush method
464 * defined.
465 */
466 .type proc_types,#object
467proc_types:
468 .word 0x41560600 @ ARM6/610
469 .word 0xffffffe0
470 b __arm6_cache_off @ works, but slow
471 b __arm6_cache_off
472 mov pc, lr
473@ b __arm6_cache_on @ untested
474@ b __arm6_cache_off
475@ b __armv3_cache_flush
476
477 .word 0x00000000 @ old ARM ID
478 .word 0x0000f000
479 mov pc, lr
480 mov pc, lr
481 mov pc, lr
482
483 .word 0x41007000 @ ARM7/710
484 .word 0xfff8fe00
485 b __arm7_cache_off
486 b __arm7_cache_off
487 mov pc, lr
488
489 .word 0x41807200 @ ARM720T (writethrough)
490 .word 0xffffff00
491 b __armv4_cache_on
492 b __armv4_cache_off
493 mov pc, lr
494
495 .word 0x00007000 @ ARM7 IDs
496 .word 0x0000f000
497 mov pc, lr
498 mov pc, lr
499 mov pc, lr
500
501 @ Everything from here on will be the new ID system.
502
503 .word 0x4401a100 @ sa110 / sa1100
504 .word 0xffffffe0
505 b __armv4_cache_on
506 b __armv4_cache_off
507 b __armv4_cache_flush
508
509 .word 0x6901b110 @ sa1110
510 .word 0xfffffff0
511 b __armv4_cache_on
512 b __armv4_cache_off
513 b __armv4_cache_flush
514
515 @ These match on the architecture ID
516
517 .word 0x00020000 @ ARMv4T
518 .word 0x000f0000
519 b __armv4_cache_on
520 b __armv4_cache_off
521 b __armv4_cache_flush
522
523 .word 0x00050000 @ ARMv5TE
524 .word 0x000f0000
525 b __armv4_cache_on
526 b __armv4_cache_off
527 b __armv4_cache_flush
528
529 .word 0x00060000 @ ARMv5TEJ
530 .word 0x000f0000
531 b __armv4_cache_on
532 b __armv4_cache_off
533 b __armv4_cache_flush
534
535 .word 0x00070000 @ ARMv6
536 .word 0x000f0000
537 b __armv4_cache_on
538 b __armv4_cache_off
539 b __armv6_cache_flush
540
541 .word 0 @ unrecognised type
542 .word 0
543 mov pc, lr
544 mov pc, lr
545 mov pc, lr
546
547 .size proc_types, . - proc_types
548
549/*
550 * Turn off the Cache and MMU. ARMv3 does not support
551 * reading the control register, but ARMv4 does.
552 *
553 * On entry, r6 = processor ID
554 * On exit, r0, r1, r2, r3, r12 corrupted
555 * This routine must preserve: r4, r6, r7
556 */
557 .align 5
558cache_off: mov r3, #12 @ cache_off function
559 b call_cache_fn
560
561__armv4_cache_off:
562 mrc p15, 0, r0, c1, c0
563 bic r0, r0, #0x000d
564 mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
565 mov r0, #0
566 mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4
567 mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4
568 mov pc, lr
569
570__arm6_cache_off:
571 mov r0, #0x00000030 @ ARM6 control reg.
572 b __armv3_cache_off
573
574__arm7_cache_off:
575 mov r0, #0x00000070 @ ARM7 control reg.
576 b __armv3_cache_off
577
578__armv3_cache_off:
579 mcr p15, 0, r0, c1, c0, 0 @ turn MMU and cache off
580 mov r0, #0
581 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
582 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3
583 mov pc, lr
584
585/*
586 * Clean and flush the cache to maintain consistency.
587 *
588 * On entry,
589 * r6 = processor ID
590 * On exit,
591 * r1, r2, r3, r11, r12 corrupted
592 * This routine must preserve:
593 * r0, r4, r5, r6, r7
594 */
595 .align 5
596cache_clean_flush:
597 mov r3, #16
598 b call_cache_fn
599
600__armv6_cache_flush:
601 mov r1, #0
602 mcr p15, 0, r1, c7, c14, 0 @ clean+invalidate D
603 mcr p15, 0, r1, c7, c5, 0 @ invalidate I+BTB
604 mcr p15, 0, r1, c7, c15, 0 @ clean+invalidate unified
605 mcr p15, 0, r1, c7, c10, 4 @ drain WB
606 mov pc, lr
607
608__armv4_cache_flush:
609 mov r2, #64*1024 @ default: 32K dcache size (*2)
610 mov r11, #32 @ default: 32 byte line size
611 mrc p15, 0, r3, c0, c0, 1 @ read cache type
612 teq r3, r6 @ cache ID register present?
613 beq no_cache_id
614 mov r1, r3, lsr #18
615 and r1, r1, #7
616 mov r2, #1024
617 mov r2, r2, lsl r1 @ base dcache size *2
618 tst r3, #1 << 14 @ test M bit
619 addne r2, r2, r2, lsr #1 @ +1/2 size if M == 1
620 mov r3, r3, lsr #12
621 and r3, r3, #3
622 mov r11, #8
623 mov r11, r11, lsl r3 @ cache line size in bytes
624no_cache_id:
625 bic r1, pc, #63 @ align to longest cache line
626 add r2, r1, r2
6271: ldr r3, [r1], r11 @ s/w flush D cache
628 teq r1, r2
629 bne 1b
630
631 mcr p15, 0, r1, c7, c5, 0 @ flush I cache
632 mcr p15, 0, r1, c7, c6, 0 @ flush D cache
633 mcr p15, 0, r1, c7, c10, 4 @ drain WB
634 mov pc, lr
635
636__armv3_cache_flush:
637 mov r1, #0
638 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
639 mov pc, lr
640
641/*
642 * Various debugging routines for printing hex characters and
643 * memory, which again must be relocatable.
644 */
645#ifdef DEBUG
646 .type phexbuf,#object
647phexbuf: .space 12
648 .size phexbuf, . - phexbuf
649
650phex: adr r3, phexbuf
651 mov r2, #0
652 strb r2, [r3, r1]
6531: subs r1, r1, #1
654 movmi r0, r3
655 bmi puts
656 and r2, r0, #15
657 mov r0, r0, lsr #4
658 cmp r2, #10
659 addge r2, r2, #7
660 add r2, r2, #'0'
661 strb r2, [r3, r1]
662 b 1b
663
664puts: loadsp r3
6651: ldrb r2, [r0], #1
666 teq r2, #0
667 moveq pc, lr
5cd0c344 6682: writeb r2, r3
1da177e4
LT
669 mov r1, #0x00020000
6703: subs r1, r1, #1
671 bne 3b
672 teq r2, #'\n'
673 moveq r2, #'\r'
674 beq 2b
675 teq r0, #0
676 bne 1b
677 mov pc, lr
678putc:
679 mov r2, r0
680 mov r0, #0
681 loadsp r3
682 b 2b
683
684memdump: mov r12, r0
685 mov r10, lr
686 mov r11, #0
6872: mov r0, r11, lsl #2
688 add r0, r0, r12
689 mov r1, #8
690 bl phex
691 mov r0, #':'
692 bl putc
6931: mov r0, #' '
694 bl putc
695 ldr r0, [r12, r11, lsl #2]
696 mov r1, #8
697 bl phex
698 and r0, r11, #7
699 teq r0, #3
700 moveq r0, #' '
701 bleq putc
702 and r0, r11, #7
703 add r11, r11, #1
704 teq r0, #7
705 bne 1b
706 mov r0, #'\n'
707 bl putc
708 cmp r11, #64
709 blt 2b
710 mov pc, r10
711#endif
712
713reloc_end:
714
715 .align
716 .section ".stack", "w"
717user_stack: .space 4096