memcpy-y := memcpy.o
memcpy-$(CONFIG_CPU_SH4) := memcpy-sh4.o
-lib-y += $(memcpy-y)
+lib-$(CONFIG_MMU) += copy_page.o clear_page.o
+lib-y += $(memcpy-y)
EXTRA_CFLAGS += -Werror
--- /dev/null
+/*
+ * __clear_user_page, __clear_user, clear_page implementation of SuperH
+ *
+ * Copyright (C) 2001 Kaz Kojima
+ * Copyright (C) 2001, 2002 Niibe Yutaka
+ * Copyright (C) 2006 Paul Mundt
+ */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+/*
+ * clear_page_slow
+ * @to: P1 address
+ *
+ * void clear_page_slow(void *to)
+ */
+
+/*
+ * r0 --- scratch
+ * r4 --- to
+ * r5 --- to + PAGE_SIZE
+ */
+ENTRY(clear_page_slow)
+ mov r4,r5
+ mov.l .Llimit,r0
+ add r0,r5
+ mov #0,r0
+ !
+1:
+#if defined(CONFIG_CPU_SH3)
+ mov.l r0,@r4
+#elif defined(CONFIG_CPU_SH4)
+ movca.l r0,@r4
+ mov r4,r1
+#endif
+ add #32,r4
+ mov.l r0,@-r4
+ mov.l r0,@-r4
+ mov.l r0,@-r4
+ mov.l r0,@-r4
+ mov.l r0,@-r4
+ mov.l r0,@-r4
+ mov.l r0,@-r4
+#if defined(CONFIG_CPU_SH4)
+ ocbwb @r1
+#endif
+ cmp/eq r5,r4
+ bf/s 1b
+ add #28,r4
+ !
+ rts
+ nop
+.Llimit: .long (PAGE_SIZE-28)
+
+ENTRY(__clear_user)
+ !
+ mov #0, r0
+ mov #0xe0, r1 ! 0xffffffe0
+ !
+ ! r4..(r4+31)&~32 -------- not aligned [ Area 0 ]
+ ! (r4+31)&~32..(r4+r5)&~32 -------- aligned [ Area 1 ]
+ ! (r4+r5)&~32..r4+r5 -------- not aligned [ Area 2 ]
+ !
+ ! Clear area 0
+ mov r4, r2
+ !
+ tst r1, r5 ! length < 32
+ bt .Larea2 ! skip to remainder
+ !
+ add #31, r2
+ and r1, r2
+ cmp/eq r4, r2
+ bt .Larea1
+ mov r2, r3
+ sub r4, r3
+ mov r3, r7
+ mov r4, r2
+ !
+.L0: dt r3
+0: mov.b r0, @r2
+ bf/s .L0
+ add #1, r2
+ !
+ sub r7, r5
+ mov r2, r4
+.Larea1:
+ mov r4, r3
+ add r5, r3
+ and r1, r3
+ cmp/hi r2, r3
+ bf .Larea2
+ !
+ ! Clear area 1
+#if defined(CONFIG_CPU_SH4)
+1: movca.l r0, @r2
+#else
+1: mov.l r0, @r2
+#endif
+ add #4, r2
+2: mov.l r0, @r2
+ add #4, r2
+3: mov.l r0, @r2
+ add #4, r2
+4: mov.l r0, @r2
+ add #4, r2
+5: mov.l r0, @r2
+ add #4, r2
+6: mov.l r0, @r2
+ add #4, r2
+7: mov.l r0, @r2
+ add #4, r2
+8: mov.l r0, @r2
+ add #4, r2
+ cmp/hi r2, r3
+ bt/s 1b
+ nop
+ !
+ ! Clear area 2
+.Larea2:
+ mov r4, r3
+ add r5, r3
+ cmp/hs r3, r2
+ bt/s .Ldone
+ sub r2, r3
+.L2: dt r3
+9: mov.b r0, @r2
+ bf/s .L2
+ add #1, r2
+ !
+.Ldone: rts
+ mov #0, r0 ! return 0 as normal return
+
+ ! return the number of bytes remained
+.Lbad_clear_user:
+ mov r4, r0
+ add r5, r0
+ rts
+ sub r2, r0
+
+.section __ex_table,"a"
+ .align 2
+ .long 0b, .Lbad_clear_user
+ .long 1b, .Lbad_clear_user
+ .long 2b, .Lbad_clear_user
+ .long 3b, .Lbad_clear_user
+ .long 4b, .Lbad_clear_user
+ .long 5b, .Lbad_clear_user
+ .long 6b, .Lbad_clear_user
+ .long 7b, .Lbad_clear_user
+ .long 8b, .Lbad_clear_user
+ .long 9b, .Lbad_clear_user
+.previous
--- /dev/null
+/*
+ * copy_page, __copy_user_page, __copy_user implementation of SuperH
+ *
+ * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima
+ * Copyright (C) 2002 Toshinobu Sugioka
+ * Copyright (C) 2006 Paul Mundt
+ */
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+/*
+ * copy_page
+ * @to: P1 address
+ * @from: P1 address
+ *
+ * void copy_page(void *to, void *from)
+ */
+
+/*
+ * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch
+ * r8 --- from + PAGE_SIZE
+ * r9 --- not used
+ * r10 --- to
+ * r11 --- from
+ */
+ENTRY(copy_page)
+ mov.l r8,@-r15
+ mov.l r10,@-r15
+ mov.l r11,@-r15
+ mov r4,r10
+ mov r5,r11
+ mov r5,r8
+ mov.l .Lpsz,r0
+ add r0,r8
+ !
+1: mov.l @r11+,r0
+ mov.l @r11+,r1
+ mov.l @r11+,r2
+ mov.l @r11+,r3
+ mov.l @r11+,r4
+ mov.l @r11+,r5
+ mov.l @r11+,r6
+ mov.l @r11+,r7
+#if defined(CONFIG_CPU_SH3)
+ mov.l r0,@r10
+#elif defined(CONFIG_CPU_SH4)
+ movca.l r0,@r10
+ mov r10,r0
+#endif
+ add #32,r10
+ mov.l r7,@-r10
+ mov.l r6,@-r10
+ mov.l r5,@-r10
+ mov.l r4,@-r10
+ mov.l r3,@-r10
+ mov.l r2,@-r10
+ mov.l r1,@-r10
+#if defined(CONFIG_CPU_SH4)
+ ocbwb @r0
+#endif
+ cmp/eq r11,r8
+ bf/s 1b
+ add #28,r10
+ !
+ mov.l @r15+,r11
+ mov.l @r15+,r10
+ mov.l @r15+,r8
+ rts
+ nop
+
+ .align 2
+.Lpsz: .long PAGE_SIZE
+/*
+ * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
+ * Return the number of bytes NOT copied
+ */
+#define EX(...) \
+ 9999: __VA_ARGS__ ; \
+ .section __ex_table, "a"; \
+ .long 9999b, 6000f ; \
+ .previous
+ENTRY(__copy_user)
+ ! Check if small number of bytes
+ mov #11,r0
+ mov r4,r3
+ cmp/gt r0,r6 ! r6 (len) > r0 (11)
+ bf/s .L_cleanup_loop_no_pop
+ add r6,r3 ! last destination address
+
+ ! Calculate bytes needed to align to src
+ mov.l r11,@-r15
+ neg r5,r0
+ mov.l r10,@-r15
+ add #4,r0
+ mov.l r9,@-r15
+ and #3,r0
+ mov.l r8,@-r15
+ tst r0,r0
+ bt 2f
+
+1:
+ ! Copy bytes to long word align src
+EX( mov.b @r5+,r1 )
+ dt r0
+ add #-1,r6
+EX( mov.b r1,@r4 )
+ bf/s 1b
+ add #1,r4
+
+ ! Jump to appropriate routine depending on dest
+2: mov #3,r1
+ mov r6, r2
+ and r4,r1
+ shlr2 r2
+ shll2 r1
+ mova .L_jump_tbl,r0
+ mov.l @(r0,r1),r1
+ jmp @r1
+ nop
+
+ .align 2
+.L_jump_tbl:
+ .long .L_dest00
+ .long .L_dest01
+ .long .L_dest10
+ .long .L_dest11
+
+/*
+ * Come here if there are less than 12 bytes to copy
+ *
+ * Keep the branch target close, so the bf/s callee doesn't overflow
+ * and result in a more expensive branch being inserted. This is the
+ * fast-path for small copies, the jump via the jump table will hit the
+ * default slow-path cleanup. -PFM.
+ */
+.L_cleanup_loop_no_pop:
+ tst r6,r6 ! Check explicitly for zero
+ bt 1f
+
+2:
+EX( mov.b @r5+,r0 )
+ dt r6
+EX( mov.b r0,@r4 )
+ bf/s 2b
+ add #1,r4
+
+1: mov #0,r0 ! normal return
+5000:
+
+# Exception handler:
+.section .fixup, "ax"
+6000:
+ mov.l 8000f,r1
+ mov r3,r0
+ jmp @r1
+ sub r4,r0
+ .align 2
+8000: .long 5000b
+
+.previous
+ rts
+ nop
+
+! Destination = 00
+
+.L_dest00:
+ ! Skip the large copy for small transfers
+ mov #(32+32-4), r0
+ cmp/gt r6, r0 ! r0 (60) > r6 (len)
+ bt 1f
+
+ ! Align dest to a 32 byte boundary
+ neg r4,r0
+ add #0x20, r0
+ and #0x1f, r0
+ tst r0, r0
+ bt 2f
+
+ sub r0, r6
+ shlr2 r0
+3:
+EX( mov.l @r5+,r1 )
+ dt r0
+EX( mov.l r1,@r4 )
+ bf/s 3b
+ add #4,r4
+
+2:
+EX( mov.l @r5+,r0 )
+EX( mov.l @r5+,r1 )
+EX( mov.l @r5+,r2 )
+EX( mov.l @r5+,r7 )
+EX( mov.l @r5+,r8 )
+EX( mov.l @r5+,r9 )
+EX( mov.l @r5+,r10 )
+EX( mov.l @r5+,r11 )
+#ifdef CONFIG_CPU_SH4
+EX( movca.l r0,@r4 )
+#else
+EX( mov.l r0,@r4 )
+#endif
+ add #-32, r6
+EX( mov.l r1,@(4,r4) )
+ mov #32, r0
+EX( mov.l r2,@(8,r4) )
+ cmp/gt r6, r0 ! r0 (32) > r6 (len)
+EX( mov.l r7,@(12,r4) )
+EX( mov.l r8,@(16,r4) )
+EX( mov.l r9,@(20,r4) )
+EX( mov.l r10,@(24,r4) )
+EX( mov.l r11,@(28,r4) )
+ bf/s 2b
+ add #32,r4
+
+1: mov r6, r0
+ shlr2 r0
+ tst r0, r0
+ bt .L_cleanup
+1:
+EX( mov.l @r5+,r1 )
+ dt r0
+EX( mov.l r1,@r4 )
+ bf/s 1b
+ add #4,r4
+
+ bra .L_cleanup
+ nop
+
+! Destination = 10
+
+.L_dest10:
+ mov r2,r7
+ shlr2 r7
+ shlr r7
+ tst r7,r7
+ mov #7,r0
+ bt/s 1f
+ and r0,r2
+2:
+ dt r7
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+EX( mov.l @r5+,r0 )
+EX( mov.l @r5+,r1 )
+EX( mov.l @r5+,r8 )
+EX( mov.l @r5+,r9 )
+EX( mov.l @r5+,r10 )
+EX( mov.w r0,@r4 )
+ add #2,r4
+ xtrct r1,r0
+ xtrct r8,r1
+ xtrct r9,r8
+ xtrct r10,r9
+
+EX( mov.l r0,@r4 )
+EX( mov.l r1,@(4,r4) )
+EX( mov.l r8,@(8,r4) )
+EX( mov.l r9,@(12,r4) )
+
+EX( mov.l @r5+,r1 )
+EX( mov.l @r5+,r8 )
+EX( mov.l @r5+,r0 )
+ xtrct r1,r10
+ xtrct r8,r1
+ xtrct r0,r8
+ shlr16 r0
+EX( mov.l r10,@(16,r4) )
+EX( mov.l r1,@(20,r4) )
+EX( mov.l r8,@(24,r4) )
+EX( mov.w r0,@(28,r4) )
+ bf/s 2b
+ add #30,r4
+#else
+EX( mov.l @(28,r5),r0 )
+EX( mov.l @(24,r5),r8 )
+EX( mov.l @(20,r5),r9 )
+EX( mov.l @(16,r5),r10 )
+EX( mov.w r0,@(30,r4) )
+ add #-2,r4
+ xtrct r8,r0
+ xtrct r9,r8
+ xtrct r10,r9
+EX( mov.l r0,@(28,r4) )
+EX( mov.l r8,@(24,r4) )
+EX( mov.l r9,@(20,r4) )
+
+EX( mov.l @(12,r5),r0 )
+EX( mov.l @(8,r5),r8 )
+ xtrct r0,r10
+EX( mov.l @(4,r5),r9 )
+ mov.l r10,@(16,r4)
+EX( mov.l @r5,r10 )
+ xtrct r8,r0
+ xtrct r9,r8
+ xtrct r10,r9
+EX( mov.l r0,@(12,r4) )
+EX( mov.l r8,@(8,r4) )
+ swap.w r10,r0
+EX( mov.l r9,@(4,r4) )
+EX( mov.w r0,@(2,r4) )
+
+ add #32,r5
+ bf/s 2b
+ add #34,r4
+#endif
+ tst r2,r2
+ bt .L_cleanup
+
+1: ! Read longword, write two words per iteration
+EX( mov.l @r5+,r0 )
+ dt r2
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+EX( mov.w r0,@r4 )
+ shlr16 r0
+EX( mov.w r0,@(2,r4) )
+#else
+EX( mov.w r0,@(2,r4) )
+ shlr16 r0
+EX( mov.w r0,@r4 )
+#endif
+ bf/s 1b
+ add #4,r4
+
+ bra .L_cleanup
+ nop
+
+! Destination = 01 or 11
+
+.L_dest01:
+.L_dest11:
+ ! Read longword, write byte, word, byte per iteration
+EX( mov.l @r5+,r0 )
+ dt r2
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+EX( mov.b r0,@r4 )
+ shlr8 r0
+ add #1,r4
+EX( mov.w r0,@r4 )
+ shlr16 r0
+EX( mov.b r0,@(2,r4) )
+ bf/s .L_dest01
+ add #3,r4
+#else
+EX( mov.b r0,@(3,r4) )
+ shlr8 r0
+ swap.w r0,r7
+EX( mov.b r7,@r4 )
+ add #1,r4
+EX( mov.w r0,@r4 )
+ bf/s .L_dest01
+ add #3,r4
+#endif
+
+! Cleanup last few bytes
+.L_cleanup:
+ mov r6,r0
+ and #3,r0
+ tst r0,r0
+ bt .L_exit
+ mov r0,r6
+
+.L_cleanup_loop:
+EX( mov.b @r5+,r0 )
+ dt r6
+EX( mov.b r0,@r4 )
+ bf/s .L_cleanup_loop
+ add #1,r4
+
+.L_exit:
+ mov #0,r0 ! normal return
+
+5000:
+
+# Exception handler:
+.section .fixup, "ax"
+6000:
+ mov.l 8000f,r1
+ mov r3,r0
+ jmp @r1
+ sub r4,r0
+ .align 2
+8000: .long 5000b
+
+.previous
+ mov.l @r15+,r8
+ mov.l @r15+,r9
+ mov.l @r15+,r10
+ rts
+ mov.l @r15+,r11
# Panic should really be compiled as PIC
lib-y := udelay.o c-checksum.o dbg.o panic.o memcpy.o copy_user_memcpy.o \
- page_copy.o page_clear.o
+ copy_page.o clear_page.o
--- /dev/null
+/*
+ Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
+
+ This file is subject to the terms and conditions of the GNU General Public
+ License. See the file "COPYING" in the main directory of this archive
+ for more details.
+
+ Tight version of memset for the case of just clearing a page. It turns out
+ that having the alloco's spaced out slightly due to the increment/branch
+ pair causes them to contend less for access to the cache. Similarly,
+ keeping the stores apart from the allocos causes less contention. => Do two
+ separate loops. Do multiple stores per loop to amortise the
+ increment/branch cost a little.
+
+ Parameters:
+ r2 : source effective address (start of page)
+
+ Always clears 4096 bytes.
+
+ Note : alloco guarded by synco to avoid TAKum03020 erratum
+
+*/
+
+ .section .text..SHmedia32,"ax"
+ .little
+
+ .balign 8
+ .global clear_page
+clear_page:
+ pta/l 1f, tr1
+ pta/l 2f, tr2
+ ptabs/l r18, tr0
+
+ movi 4096, r7
+ add r2, r7, r7
+ add r2, r63, r6
+1:
+ alloco r6, 0
+ synco ! TAKum03020
+ addi r6, 32, r6
+ bgt/l r7, r6, tr1
+
+ add r2, r63, r6
+2:
+ st.q r6, 0, r63
+ st.q r6, 8, r63
+ st.q r6, 16, r63
+ st.q r6, 24, r63
+ addi r6, 32, r6
+ bgt/l r7, r6, tr2
+
+ blink tr0, r63
+
+
--- /dev/null
+/*
+ Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
+
+ This file is subject to the terms and conditions of the GNU General Public
+ License. See the file "COPYING" in the main directory of this archive
+ for more details.
+
+ Tight version of mempy for the case of just copying a page.
+ Prefetch strategy empirically optimised against RTL simulations
+ of SH5-101 cut2 eval chip with Cayman board DDR memory.
+
+ Parameters:
+ r2 : destination effective address (start of page)
+ r3 : source effective address (start of page)
+
+ Always copies 4096 bytes.
+
+ Points to review.
+ * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.
+ It seems like the prefetch needs to be at at least 4 lines ahead to get
+ the data into the cache in time, and the allocos contend with outstanding
+ prefetches for the same cache set, so it's better to have the numbers
+ different.
+ */
+
+ .section .text..SHmedia32,"ax"
+ .little
+
+ .balign 8
+ .global copy_page
+copy_page:
+
+ /* Copy 4096 bytes worth of data from r3 to r2.
+ Do prefetches 4 lines ahead.
+ Do alloco 2 lines ahead */
+
+ pta 1f, tr1
+ pta 2f, tr2
+ pta 3f, tr3
+ ptabs r18, tr0
+
+#if 0
+ /* TAKum03020 */
+ ld.q r3, 0x00, r63
+ ld.q r3, 0x20, r63
+ ld.q r3, 0x40, r63
+ ld.q r3, 0x60, r63
+#endif
+ alloco r2, 0x00
+ synco ! TAKum03020
+ alloco r2, 0x20
+ synco ! TAKum03020
+
+ movi 3968, r6
+ add r2, r6, r6
+ addi r6, 64, r7
+ addi r7, 64, r8
+ sub r3, r2, r60
+ addi r60, 8, r61
+ addi r61, 8, r62
+ addi r62, 8, r23
+ addi r60, 0x80, r22
+
+/* Minimal code size. The extra branches inside the loop don't cost much
+ because they overlap with the time spent waiting for prefetches to
+ complete. */
+1:
+#if 0
+ /* TAKum03020 */
+ bge/u r2, r6, tr2 ! skip prefetch for last 4 lines
+ ldx.q r2, r22, r63 ! prefetch 4 lines hence
+#endif
+2:
+ bge/u r2, r7, tr3 ! skip alloco for last 2 lines
+ alloco r2, 0x40 ! alloc destination line 2 lines ahead
+ synco ! TAKum03020
+3:
+ ldx.q r2, r60, r36
+ ldx.q r2, r61, r37
+ ldx.q r2, r62, r38
+ ldx.q r2, r23, r39
+ st.q r2, 0, r36
+ st.q r2, 8, r37
+ st.q r2, 16, r38
+ st.q r2, 24, r39
+ addi r2, 32, r2
+ bgt/l r8, r2, tr1
+
+ blink tr0, r63 ! return
+++ /dev/null
-/*
- Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
-
- This file is subject to the terms and conditions of the GNU General Public
- License. See the file "COPYING" in the main directory of this archive
- for more details.
-
- Tight version of memset for the case of just clearing a page. It turns out
- that having the alloco's spaced out slightly due to the increment/branch
- pair causes them to contend less for access to the cache. Similarly,
- keeping the stores apart from the allocos causes less contention. => Do two
- separate loops. Do multiple stores per loop to amortise the
- increment/branch cost a little.
-
- Parameters:
- r2 : source effective address (start of page)
-
- Always clears 4096 bytes.
-
- Note : alloco guarded by synco to avoid TAKum03020 erratum
-
-*/
-
- .section .text..SHmedia32,"ax"
- .little
-
- .balign 8
- .global clear_page
-clear_page:
- pta/l 1f, tr1
- pta/l 2f, tr2
- ptabs/l r18, tr0
-
- movi 4096, r7
- add r2, r7, r7
- add r2, r63, r6
-1:
- alloco r6, 0
- synco ! TAKum03020
- addi r6, 32, r6
- bgt/l r7, r6, tr1
-
- add r2, r63, r6
-2:
- st.q r6, 0, r63
- st.q r6, 8, r63
- st.q r6, 16, r63
- st.q r6, 24, r63
- addi r6, 32, r6
- bgt/l r7, r6, tr2
-
- blink tr0, r63
-
-
+++ /dev/null
-/*
- Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
-
- This file is subject to the terms and conditions of the GNU General Public
- License. See the file "COPYING" in the main directory of this archive
- for more details.
-
- Tight version of mempy for the case of just copying a page.
- Prefetch strategy empirically optimised against RTL simulations
- of SH5-101 cut2 eval chip with Cayman board DDR memory.
-
- Parameters:
- r2 : destination effective address (start of page)
- r3 : source effective address (start of page)
-
- Always copies 4096 bytes.
-
- Points to review.
- * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.
- It seems like the prefetch needs to be at at least 4 lines ahead to get
- the data into the cache in time, and the allocos contend with outstanding
- prefetches for the same cache set, so it's better to have the numbers
- different.
- */
-
- .section .text..SHmedia32,"ax"
- .little
-
- .balign 8
- .global copy_page
-copy_page:
-
- /* Copy 4096 bytes worth of data from r3 to r2.
- Do prefetches 4 lines ahead.
- Do alloco 2 lines ahead */
-
- pta 1f, tr1
- pta 2f, tr2
- pta 3f, tr3
- ptabs r18, tr0
-
-#if 0
- /* TAKum03020 */
- ld.q r3, 0x00, r63
- ld.q r3, 0x20, r63
- ld.q r3, 0x40, r63
- ld.q r3, 0x60, r63
-#endif
- alloco r2, 0x00
- synco ! TAKum03020
- alloco r2, 0x20
- synco ! TAKum03020
-
- movi 3968, r6
- add r2, r6, r6
- addi r6, 64, r7
- addi r7, 64, r8
- sub r3, r2, r60
- addi r60, 8, r61
- addi r61, 8, r62
- addi r62, 8, r23
- addi r60, 0x80, r22
-
-/* Minimal code size. The extra branches inside the loop don't cost much
- because they overlap with the time spent waiting for prefetches to
- complete. */
-1:
-#if 0
- /* TAKum03020 */
- bge/u r2, r6, tr2 ! skip prefetch for last 4 lines
- ldx.q r2, r22, r63 ! prefetch 4 lines hence
-#endif
-2:
- bge/u r2, r7, tr3 ! skip alloco for last 2 lines
- alloco r2, 0x40 ! alloc destination line 2 lines ahead
- synco ! TAKum03020
-3:
- ldx.q r2, r60, r36
- ldx.q r2, r61, r37
- ldx.q r2, r62, r38
- ldx.q r2, r23, r39
- st.q r2, 0, r36
- st.q r2, 8, r37
- st.q r2, 16, r38
- st.q r2, 24, r39
- addi r2, 32, r2
- bgt/l r8, r2, tr1
-
- blink tr0, r63 ! return
endif
mmu-y := tlb-nommu.o pg-nommu.o
-mmu-$(CONFIG_MMU) := fault_32.o clear_page.o copy_page.o tlbflush_32.o \
- ioremap_32.o
+mmu-$(CONFIG_MMU) := fault_32.o tlbflush_32.o ioremap_32.o
obj-y += $(mmu-y)
+++ /dev/null
-/*
- * __clear_user_page, __clear_user, clear_page implementation of SuperH
- *
- * Copyright (C) 2001 Kaz Kojima
- * Copyright (C) 2001, 2002 Niibe Yutaka
- * Copyright (C) 2006 Paul Mundt
- */
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-/*
- * clear_page_slow
- * @to: P1 address
- *
- * void clear_page_slow(void *to)
- */
-
-/*
- * r0 --- scratch
- * r4 --- to
- * r5 --- to + PAGE_SIZE
- */
-ENTRY(clear_page_slow)
- mov r4,r5
- mov.l .Llimit,r0
- add r0,r5
- mov #0,r0
- !
-1:
-#if defined(CONFIG_CPU_SH3)
- mov.l r0,@r4
-#elif defined(CONFIG_CPU_SH4)
- movca.l r0,@r4
- mov r4,r1
-#endif
- add #32,r4
- mov.l r0,@-r4
- mov.l r0,@-r4
- mov.l r0,@-r4
- mov.l r0,@-r4
- mov.l r0,@-r4
- mov.l r0,@-r4
- mov.l r0,@-r4
-#if defined(CONFIG_CPU_SH4)
- ocbwb @r1
-#endif
- cmp/eq r5,r4
- bf/s 1b
- add #28,r4
- !
- rts
- nop
-.Llimit: .long (PAGE_SIZE-28)
-
-ENTRY(__clear_user)
- !
- mov #0, r0
- mov #0xe0, r1 ! 0xffffffe0
- !
- ! r4..(r4+31)&~32 -------- not aligned [ Area 0 ]
- ! (r4+31)&~32..(r4+r5)&~32 -------- aligned [ Area 1 ]
- ! (r4+r5)&~32..r4+r5 -------- not aligned [ Area 2 ]
- !
- ! Clear area 0
- mov r4, r2
- !
- tst r1, r5 ! length < 32
- bt .Larea2 ! skip to remainder
- !
- add #31, r2
- and r1, r2
- cmp/eq r4, r2
- bt .Larea1
- mov r2, r3
- sub r4, r3
- mov r3, r7
- mov r4, r2
- !
-.L0: dt r3
-0: mov.b r0, @r2
- bf/s .L0
- add #1, r2
- !
- sub r7, r5
- mov r2, r4
-.Larea1:
- mov r4, r3
- add r5, r3
- and r1, r3
- cmp/hi r2, r3
- bf .Larea2
- !
- ! Clear area 1
-#if defined(CONFIG_CPU_SH4)
-1: movca.l r0, @r2
-#else
-1: mov.l r0, @r2
-#endif
- add #4, r2
-2: mov.l r0, @r2
- add #4, r2
-3: mov.l r0, @r2
- add #4, r2
-4: mov.l r0, @r2
- add #4, r2
-5: mov.l r0, @r2
- add #4, r2
-6: mov.l r0, @r2
- add #4, r2
-7: mov.l r0, @r2
- add #4, r2
-8: mov.l r0, @r2
- add #4, r2
- cmp/hi r2, r3
- bt/s 1b
- nop
- !
- ! Clear area 2
-.Larea2:
- mov r4, r3
- add r5, r3
- cmp/hs r3, r2
- bt/s .Ldone
- sub r2, r3
-.L2: dt r3
-9: mov.b r0, @r2
- bf/s .L2
- add #1, r2
- !
-.Ldone: rts
- mov #0, r0 ! return 0 as normal return
-
- ! return the number of bytes remained
-.Lbad_clear_user:
- mov r4, r0
- add r5, r0
- rts
- sub r2, r0
-
-.section __ex_table,"a"
- .align 2
- .long 0b, .Lbad_clear_user
- .long 1b, .Lbad_clear_user
- .long 2b, .Lbad_clear_user
- .long 3b, .Lbad_clear_user
- .long 4b, .Lbad_clear_user
- .long 5b, .Lbad_clear_user
- .long 6b, .Lbad_clear_user
- .long 7b, .Lbad_clear_user
- .long 8b, .Lbad_clear_user
- .long 9b, .Lbad_clear_user
-.previous
+++ /dev/null
-/*
- * copy_page, __copy_user_page, __copy_user implementation of SuperH
- *
- * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima
- * Copyright (C) 2002 Toshinobu Sugioka
- * Copyright (C) 2006 Paul Mundt
- */
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-/*
- * copy_page
- * @to: P1 address
- * @from: P1 address
- *
- * void copy_page(void *to, void *from)
- */
-
-/*
- * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch
- * r8 --- from + PAGE_SIZE
- * r9 --- not used
- * r10 --- to
- * r11 --- from
- */
-ENTRY(copy_page)
- mov.l r8,@-r15
- mov.l r10,@-r15
- mov.l r11,@-r15
- mov r4,r10
- mov r5,r11
- mov r5,r8
- mov.l .Lpsz,r0
- add r0,r8
- !
-1: mov.l @r11+,r0
- mov.l @r11+,r1
- mov.l @r11+,r2
- mov.l @r11+,r3
- mov.l @r11+,r4
- mov.l @r11+,r5
- mov.l @r11+,r6
- mov.l @r11+,r7
-#if defined(CONFIG_CPU_SH3)
- mov.l r0,@r10
-#elif defined(CONFIG_CPU_SH4)
- movca.l r0,@r10
- mov r10,r0
-#endif
- add #32,r10
- mov.l r7,@-r10
- mov.l r6,@-r10
- mov.l r5,@-r10
- mov.l r4,@-r10
- mov.l r3,@-r10
- mov.l r2,@-r10
- mov.l r1,@-r10
-#if defined(CONFIG_CPU_SH4)
- ocbwb @r0
-#endif
- cmp/eq r11,r8
- bf/s 1b
- add #28,r10
- !
- mov.l @r15+,r11
- mov.l @r15+,r10
- mov.l @r15+,r8
- rts
- nop
-
- .align 2
-.Lpsz: .long PAGE_SIZE
-/*
- * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
- * Return the number of bytes NOT copied
- */
-#define EX(...) \
- 9999: __VA_ARGS__ ; \
- .section __ex_table, "a"; \
- .long 9999b, 6000f ; \
- .previous
-ENTRY(__copy_user)
- ! Check if small number of bytes
- mov #11,r0
- mov r4,r3
- cmp/gt r0,r6 ! r6 (len) > r0 (11)
- bf/s .L_cleanup_loop_no_pop
- add r6,r3 ! last destination address
-
- ! Calculate bytes needed to align to src
- mov.l r11,@-r15
- neg r5,r0
- mov.l r10,@-r15
- add #4,r0
- mov.l r9,@-r15
- and #3,r0
- mov.l r8,@-r15
- tst r0,r0
- bt 2f
-
-1:
- ! Copy bytes to long word align src
-EX( mov.b @r5+,r1 )
- dt r0
- add #-1,r6
-EX( mov.b r1,@r4 )
- bf/s 1b
- add #1,r4
-
- ! Jump to appropriate routine depending on dest
-2: mov #3,r1
- mov r6, r2
- and r4,r1
- shlr2 r2
- shll2 r1
- mova .L_jump_tbl,r0
- mov.l @(r0,r1),r1
- jmp @r1
- nop
-
- .align 2
-.L_jump_tbl:
- .long .L_dest00
- .long .L_dest01
- .long .L_dest10
- .long .L_dest11
-
-/*
- * Come here if there are less than 12 bytes to copy
- *
- * Keep the branch target close, so the bf/s callee doesn't overflow
- * and result in a more expensive branch being inserted. This is the
- * fast-path for small copies, the jump via the jump table will hit the
- * default slow-path cleanup. -PFM.
- */
-.L_cleanup_loop_no_pop:
- tst r6,r6 ! Check explicitly for zero
- bt 1f
-
-2:
-EX( mov.b @r5+,r0 )
- dt r6
-EX( mov.b r0,@r4 )
- bf/s 2b
- add #1,r4
-
-1: mov #0,r0 ! normal return
-5000:
-
-# Exception handler:
-.section .fixup, "ax"
-6000:
- mov.l 8000f,r1
- mov r3,r0
- jmp @r1
- sub r4,r0
- .align 2
-8000: .long 5000b
-
-.previous
- rts
- nop
-
-! Destination = 00
-
-.L_dest00:
- ! Skip the large copy for small transfers
- mov #(32+32-4), r0
- cmp/gt r6, r0 ! r0 (60) > r6 (len)
- bt 1f
-
- ! Align dest to a 32 byte boundary
- neg r4,r0
- add #0x20, r0
- and #0x1f, r0
- tst r0, r0
- bt 2f
-
- sub r0, r6
- shlr2 r0
-3:
-EX( mov.l @r5+,r1 )
- dt r0
-EX( mov.l r1,@r4 )
- bf/s 3b
- add #4,r4
-
-2:
-EX( mov.l @r5+,r0 )
-EX( mov.l @r5+,r1 )
-EX( mov.l @r5+,r2 )
-EX( mov.l @r5+,r7 )
-EX( mov.l @r5+,r8 )
-EX( mov.l @r5+,r9 )
-EX( mov.l @r5+,r10 )
-EX( mov.l @r5+,r11 )
-#ifdef CONFIG_CPU_SH4
-EX( movca.l r0,@r4 )
-#else
-EX( mov.l r0,@r4 )
-#endif
- add #-32, r6
-EX( mov.l r1,@(4,r4) )
- mov #32, r0
-EX( mov.l r2,@(8,r4) )
- cmp/gt r6, r0 ! r0 (32) > r6 (len)
-EX( mov.l r7,@(12,r4) )
-EX( mov.l r8,@(16,r4) )
-EX( mov.l r9,@(20,r4) )
-EX( mov.l r10,@(24,r4) )
-EX( mov.l r11,@(28,r4) )
- bf/s 2b
- add #32,r4
-
-1: mov r6, r0
- shlr2 r0
- tst r0, r0
- bt .L_cleanup
-1:
-EX( mov.l @r5+,r1 )
- dt r0
-EX( mov.l r1,@r4 )
- bf/s 1b
- add #4,r4
-
- bra .L_cleanup
- nop
-
-! Destination = 10
-
-.L_dest10:
- mov r2,r7
- shlr2 r7
- shlr r7
- tst r7,r7
- mov #7,r0
- bt/s 1f
- and r0,r2
-2:
- dt r7
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-EX( mov.l @r5+,r0 )
-EX( mov.l @r5+,r1 )
-EX( mov.l @r5+,r8 )
-EX( mov.l @r5+,r9 )
-EX( mov.l @r5+,r10 )
-EX( mov.w r0,@r4 )
- add #2,r4
- xtrct r1,r0
- xtrct r8,r1
- xtrct r9,r8
- xtrct r10,r9
-
-EX( mov.l r0,@r4 )
-EX( mov.l r1,@(4,r4) )
-EX( mov.l r8,@(8,r4) )
-EX( mov.l r9,@(12,r4) )
-
-EX( mov.l @r5+,r1 )
-EX( mov.l @r5+,r8 )
-EX( mov.l @r5+,r0 )
- xtrct r1,r10
- xtrct r8,r1
- xtrct r0,r8
- shlr16 r0
-EX( mov.l r10,@(16,r4) )
-EX( mov.l r1,@(20,r4) )
-EX( mov.l r8,@(24,r4) )
-EX( mov.w r0,@(28,r4) )
- bf/s 2b
- add #30,r4
-#else
-EX( mov.l @(28,r5),r0 )
-EX( mov.l @(24,r5),r8 )
-EX( mov.l @(20,r5),r9 )
-EX( mov.l @(16,r5),r10 )
-EX( mov.w r0,@(30,r4) )
- add #-2,r4
- xtrct r8,r0
- xtrct r9,r8
- xtrct r10,r9
-EX( mov.l r0,@(28,r4) )
-EX( mov.l r8,@(24,r4) )
-EX( mov.l r9,@(20,r4) )
-
-EX( mov.l @(12,r5),r0 )
-EX( mov.l @(8,r5),r8 )
- xtrct r0,r10
-EX( mov.l @(4,r5),r9 )
- mov.l r10,@(16,r4)
-EX( mov.l @r5,r10 )
- xtrct r8,r0
- xtrct r9,r8
- xtrct r10,r9
-EX( mov.l r0,@(12,r4) )
-EX( mov.l r8,@(8,r4) )
- swap.w r10,r0
-EX( mov.l r9,@(4,r4) )
-EX( mov.w r0,@(2,r4) )
-
- add #32,r5
- bf/s 2b
- add #34,r4
-#endif
- tst r2,r2
- bt .L_cleanup
-
-1: ! Read longword, write two words per iteration
-EX( mov.l @r5+,r0 )
- dt r2
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-EX( mov.w r0,@r4 )
- shlr16 r0
-EX( mov.w r0,@(2,r4) )
-#else
-EX( mov.w r0,@(2,r4) )
- shlr16 r0
-EX( mov.w r0,@r4 )
-#endif
- bf/s 1b
- add #4,r4
-
- bra .L_cleanup
- nop
-
-! Destination = 01 or 11
-
-.L_dest01:
-.L_dest11:
- ! Read longword, write byte, word, byte per iteration
-EX( mov.l @r5+,r0 )
- dt r2
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-EX( mov.b r0,@r4 )
- shlr8 r0
- add #1,r4
-EX( mov.w r0,@r4 )
- shlr16 r0
-EX( mov.b r0,@(2,r4) )
- bf/s .L_dest01
- add #3,r4
-#else
-EX( mov.b r0,@(3,r4) )
- shlr8 r0
- swap.w r0,r7
-EX( mov.b r7,@r4 )
- add #1,r4
-EX( mov.w r0,@r4 )
- bf/s .L_dest01
- add #3,r4
-#endif
-
-! Cleanup last few bytes
-.L_cleanup:
- mov r6,r0
- and #3,r0
- tst r0,r0
- bt .L_exit
- mov r0,r6
-
-.L_cleanup_loop:
-EX( mov.b @r5+,r0 )
- dt r6
-EX( mov.b r0,@r4 )
- bf/s .L_cleanup_loop
- add #1,r4
-
-.L_exit:
- mov #0,r0 ! normal return
-
-5000:
-
-# Exception handler:
-.section .fixup, "ax"
-6000:
- mov.l 8000f,r1
- mov r3,r0
- jmp @r1
- sub r4,r0
- .align 2
-8000: .long 5000b
-
-.previous
- mov.l @r15+,r8
- mov.l @r15+,r9
- mov.l @r15+,r10
- rts
- mov.l @r15+,r11