Merge branch 'for-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetoot...
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / metag / lib / memcpy.S
1 ! Copyright (C) 2008-2012 Imagination Technologies Ltd.
2
3 .text
4 .global _memcpy
5 .type _memcpy,function
6 ! D1Ar1 dst
7 ! D0Ar2 src
8 ! D1Ar3 cnt
9 ! D0Re0 dst
10 _memcpy:
11 CMP D1Ar3, #16
12 MOV A1.2, D0Ar2 ! source pointer
13 MOV A0.2, D1Ar1 ! destination pointer
14 MOV A0.3, D1Ar1 ! for return value
15 ! If there are less than 16 bytes to copy use the byte copy loop
16 BGE $Llong_copy
17
18 $Lbyte_copy:
19 ! Simply copy a byte at a time
20 SUBS TXRPT, D1Ar3, #1
21 BLT $Lend
22 $Lloop_byte:
23 GETB D1Re0, [A1.2++]
24 SETB [A0.2++], D1Re0
25 BR $Lloop_byte
26
27 $Lend:
28 ! Finally set return value and return
29 MOV D0Re0, A0.3
30 MOV PC, D1RtP
31
32 $Llong_copy:
33 ANDS D1Ar5, D1Ar1, #7 ! test destination alignment
34 BZ $Laligned_dst
35
36 ! The destination address is not 8 byte aligned. We will copy bytes from
37 ! the source to the destination until the remaining data has an 8 byte
38 ! destination address alignment (i.e we should never copy more than 7
39 ! bytes here).
40 $Lalign_dst:
41 GETB D0Re0, [A1.2++]
42 ADD D1Ar5, D1Ar5, #1 ! dest is aligned when D1Ar5 reaches #8
43 SUB D1Ar3, D1Ar3, #1 ! decrement count of remaining bytes
44 SETB [A0.2++], D0Re0
45 CMP D1Ar5, #8
46 BNE $Lalign_dst
47
48 ! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte
49 ! blocks, then jump to the unaligned copy loop or fall through to the aligned
50 ! copy loop as appropriate.
51 $Laligned_dst:
52 MOV D0Ar4, A1.2
53 LSR D1Ar5, D1Ar3, #3 ! D1Ar5 = number of 8 byte blocks
54 ANDS D0Ar4, D0Ar4, #7 ! test source alignment
55 BNZ $Lunaligned_copy ! if unaligned, use unaligned copy loop
56
57 ! Both source and destination are 8 byte aligned - the easy case.
58 $Laligned_copy:
59 LSRS D1Ar5, D1Ar3, #5 ! D1Ar5 = number of 32 byte blocks
60 BZ $Lbyte_copy
61 SUB TXRPT, D1Ar5, #1
62
63 $Laligned_32:
64 GETL D0Re0, D1Re0, [A1.2++]
65 GETL D0Ar6, D1Ar5, [A1.2++]
66 SETL [A0.2++], D0Re0, D1Re0
67 SETL [A0.2++], D0Ar6, D1Ar5
68 GETL D0Re0, D1Re0, [A1.2++]
69 GETL D0Ar6, D1Ar5, [A1.2++]
70 SETL [A0.2++], D0Re0, D1Re0
71 SETL [A0.2++], D0Ar6, D1Ar5
72 BR $Laligned_32
73
74 ! If there are any remaining bytes use the byte copy loop, otherwise we are done
75 ANDS D1Ar3, D1Ar3, #0x1f
76 BNZ $Lbyte_copy
77 B $Lend
78
79 ! The destination is 8 byte aligned but the source is not, and there are 8
80 ! or more bytes to be copied.
81 $Lunaligned_copy:
82 ! Adjust the source pointer (A1.2) to the 8 byte boundary before its
83 ! current value
84 MOV D0Ar4, A1.2
85 MOV D0Ar6, A1.2
86 ANDMB D0Ar4, D0Ar4, #0xfff8
87 MOV A1.2, D0Ar4
88 ! Save the number of bytes of mis-alignment in D0Ar4 for use later
89 SUBS D0Ar6, D0Ar6, D0Ar4
90 MOV D0Ar4, D0Ar6
91 ! if there is no mis-alignment after all, use the aligned copy loop
92 BZ $Laligned_copy
93
94 ! prefetch 8 bytes
95 GETL D0Re0, D1Re0, [A1.2]
96
97 SUB TXRPT, D1Ar5, #1
98
99 ! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly
100 ! 4 bytes, and more than 4 bytes.
101 CMP D0Ar6, #4
102 BLT $Lunaligned_1_2_3 ! use 1-3 byte mis-alignment loop
103 BZ $Lunaligned_4 ! use 4 byte mis-alignment loop
104
105 ! The mis-alignment is more than 4 bytes
106 $Lunaligned_5_6_7:
107 SUB D0Ar6, D0Ar6, #4
108 ! Calculate the bit offsets required for the shift operations necesssary
109 ! to align the data.
110 ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
111 MULW D0Ar6, D0Ar6, #8
112 MOV D1Ar5, #32
113 SUB D1Ar5, D1Ar5, D0Ar6
114 ! Move data 4 bytes before we enter the main loop
115 MOV D0Re0, D1Re0
116
117 $Lloop_5_6_7:
118 GETL D0Ar2, D1Ar1, [++A1.2]
119 ! form 64-bit data in D0Re0, D1Re0
120 LSR D0Re0, D0Re0, D0Ar6
121 MOV D1Re0, D0Ar2
122 LSL D1Re0, D1Re0, D1Ar5
123 ADD D0Re0, D0Re0, D1Re0
124
125 LSR D0Ar2, D0Ar2, D0Ar6
126 LSL D1Re0, D1Ar1, D1Ar5
127 ADD D1Re0, D1Re0, D0Ar2
128
129 SETL [A0.2++], D0Re0, D1Re0
130 MOV D0Re0, D1Ar1
131 BR $Lloop_5_6_7
132
133 B $Lunaligned_end
134
135 $Lunaligned_1_2_3:
136 ! Calculate the bit offsets required for the shift operations necesssary
137 ! to align the data.
138 ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
139 MULW D0Ar6, D0Ar6, #8
140 MOV D1Ar5, #32
141 SUB D1Ar5, D1Ar5, D0Ar6
142
143 $Lloop_1_2_3:
144 ! form 64-bit data in D0Re0,D1Re0
145 LSR D0Re0, D0Re0, D0Ar6
146 LSL D1Ar1, D1Re0, D1Ar5
147 ADD D0Re0, D0Re0, D1Ar1
148 MOV D0Ar2, D1Re0
149 LSR D0FrT, D0Ar2, D0Ar6
150 GETL D0Ar2, D1Ar1, [++A1.2]
151
152 MOV D1Re0, D0Ar2
153 LSL D1Re0, D1Re0, D1Ar5
154 ADD D1Re0, D1Re0, D0FrT
155
156 SETL [A0.2++], D0Re0, D1Re0
157 MOV D0Re0, D0Ar2
158 MOV D1Re0, D1Ar1
159 BR $Lloop_1_2_3
160
161 B $Lunaligned_end
162
163 ! The 4 byte mis-alignment case - this does not require any shifting, just a
164 ! shuffling of registers.
165 $Lunaligned_4:
166 MOV D0Re0, D1Re0
167 $Lloop_4:
168 GETL D0Ar2, D1Ar1, [++A1.2]
169 MOV D1Re0, D0Ar2
170 SETL [A0.2++], D0Re0, D1Re0
171 MOV D0Re0, D1Ar1
172 BR $Lloop_4
173
174 $Lunaligned_end:
175 ! If there are no remaining bytes to copy, we are done.
176 ANDS D1Ar3, D1Ar3, #7
177 BZ $Lend
178 ! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte
179 ! address of the remaining bytes, and fall through to the byte copy loop.
180 MOV D0Ar6, A1.2
181 ADD D1Ar5, D0Ar4, D0Ar6
182 MOV A1.2, D1Ar5
183 B $Lbyte_copy
184
185 .size _memcpy,.-_memcpy