Linux-2.6.12-rc2
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / m32r / lib / checksum.S
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IP/TCP/UDP checksumming routines
7 *
8 * Authors: Jorge Cwik, <jorge@laser.satlink.net>
9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
10 * Tom May, <ftom@netcom.com>
11 * Pentium Pro/II routines:
12 * Alexander Kjeldaas <astor@guardian.no>
13 * Finn Arne Gangstad <finnag@guardian.no>
14 * Lots of code moved from tcp.c and ip.c; see those files
15 * for more names.
16 *
17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
18 * handling.
19 * Andi Kleen, add zeroing on error
20 * converted to pure assembler
21 * Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.
22 *
23 * This program is free software; you can redistribute it and/or
24 * modify it under the terms of the GNU General Public License
25 * as published by the Free Software Foundation; either version
26 * 2 of the License, or (at your option) any later version.
27 */
28 /* $Id$ */
29
30
31 #include <linux/config.h>
32 #include <linux/linkage.h>
33 #include <asm/assembler.h>
34 #include <asm/errno.h>
35
36 /*
37 * computes a partial checksum, e.g. for TCP/UDP fragments
38 */
39
40 /*
41 unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
42 */
43
44
45 #ifdef CONFIG_ISA_DUAL_ISSUE
46
47 /*
48 * Experiments with Ethernet and SLIP connections show that buff
49 * is aligned on either a 2-byte or 4-byte boundary. We get at
50 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
51 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
52 * alignment for the unrolled loop.
53 */
54
55 .text
56 ENTRY(csum_partial)
57 ; Function args
58 ; r0: unsigned char *buff
59 ; r1: int len
60 ; r2: unsigned int sum
61
62 push r2 || ldi r2, #0
63 and3 r7, r0, #1 ; Check alignment.
64 beqz r7, 1f ; Jump if alignment is ok.
65 ; 1-byte mis aligned
66 ldub r4, @r0 || addi r0, #1
67 ; clear c-bit || Alignment uses up bytes.
68 cmp r0, r0 || addi r1, #-1
69 ldi r3, #0 || addx r2, r4
70 addx r2, r3
71 .fillinsn
72 1:
73 and3 r4, r0, #2 ; Check alignment.
74 beqz r4, 2f ; Jump if alignment is ok.
75 ; clear c-bit || Alignment uses up two bytes.
76 cmp r0, r0 || addi r1, #-2
77 bgtz r1, 1f ; Jump if we had at least two bytes.
78 bra 4f || addi r1, #2
79 .fillinsn ; len(r1) was < 2. Deal with it.
80 1:
81 ; 2-byte aligned
82 lduh r4, @r0 || ldi r3, #0
83 addx r2, r4 || addi r0, #2
84 addx r2, r3
85 .fillinsn
86 2:
87 ; 4-byte aligned
88 cmp r0, r0 ; clear c-bit
89 srl3 r6, r1, #5
90 beqz r6, 2f
91 .fillinsn
92
93 1: ld r3, @r0+
94 ld r4, @r0+ ; +4
95 ld r5, @r0+ ; +8
96 ld r3, @r0+ || addx r2, r3 ; +12
97 ld r4, @r0+ || addx r2, r4 ; +16
98 ld r5, @r0+ || addx r2, r5 ; +20
99 ld r3, @r0+ || addx r2, r3 ; +24
100 ld r4, @r0+ || addx r2, r4 ; +28
101 addx r2, r5 || addi r6, #-1
102 addx r2, r3
103 addx r2, r4
104 bnez r6, 1b
105
106 addx r2, r6 ; r6=0
107 cmp r0, r0 ; This clears c-bit
108 .fillinsn
109 2: and3 r6, r1, #0x1c ; withdraw len
110 beqz r6, 4f
111 srli r6, #2
112 .fillinsn
113
114 3: ld r4, @r0+ || addi r6, #-1
115 addx r2, r4
116 bnez r6, 3b
117
118 addx r2, r6 ; r6=0
119 cmp r0, r0 ; This clears c-bit
120 .fillinsn
121 4: and3 r1, r1, #3
122 beqz r1, 7f ; if len == 0 goto end
123 and3 r6, r1, #2
124 beqz r6, 5f ; if len < 2 goto 5f(1byte)
125 lduh r4, @r0 || addi r0, #2
126 addi r1, #-2 || slli r4, #16
127 addx r2, r4
128 beqz r1, 6f
129 .fillinsn
130 5: ldub r4, @r0 || ldi r1, #0
131 #ifndef __LITTLE_ENDIAN__
132 slli r4, #8
133 #endif
134 addx r2, r4
135 .fillinsn
136 6: addx r2, r1
137 .fillinsn
138 7:
139 and3 r0, r2, #0xffff
140 srli r2, #16
141 add r0, r2
142 srl3 r2, r0, #16
143 beqz r2, 1f
144 addi r0, #1
145 and3 r0, r0, #0xffff
146 .fillinsn
147 1:
148 beqz r7, 1f ; swap the upper byte for the lower
149 and3 r2, r0, #0xff
150 srl3 r0, r0, #8
151 slli r2, #8
152 or r0, r2
153 .fillinsn
154 1:
155 pop r2 || cmp r0, r0
156 addx r0, r2 || ldi r2, #0
157 addx r0, r2
158 jmp r14
159
160 #else /* not CONFIG_ISA_DUAL_ISSUE */
161
162 /*
163 * Experiments with Ethernet and SLIP connections show that buff
164 * is aligned on either a 2-byte or 4-byte boundary. We get at
165 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
166 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
167 * alignment for the unrolled loop.
168 */
169
170 .text
171 ENTRY(csum_partial)
172 ; Function args
173 ; r0: unsigned char *buff
174 ; r1: int len
175 ; r2: unsigned int sum
176
177 push r2
178 ldi r2, #0
179 and3 r7, r0, #1 ; Check alignment.
180 beqz r7, 1f ; Jump if alignment is ok.
181 ; 1-byte mis aligned
182 ldub r4, @r0
183 addi r0, #1
184 addi r1, #-1 ; Alignment uses up bytes.
185 cmp r0, r0 ; clear c-bit
186 ldi r3, #0
187 addx r2, r4
188 addx r2, r3
189 .fillinsn
190 1:
191 and3 r4, r0, #2 ; Check alignment.
192 beqz r4, 2f ; Jump if alignment is ok.
193 addi r1, #-2 ; Alignment uses up two bytes.
194 cmp r0, r0 ; clear c-bit
195 bgtz r1, 1f ; Jump if we had at least two bytes.
196 addi r1, #2 ; len(r1) was < 2. Deal with it.
197 bra 4f
198 .fillinsn
199 1:
200 ; 2-byte aligned
201 lduh r4, @r0
202 addi r0, #2
203 ldi r3, #0
204 addx r2, r4
205 addx r2, r3
206 .fillinsn
207 2:
208 ; 4-byte aligned
209 cmp r0, r0 ; clear c-bit
210 srl3 r6, r1, #5
211 beqz r6, 2f
212 .fillinsn
213
214 1: ld r3, @r0+
215 ld r4, @r0+ ; +4
216 ld r5, @r0+ ; +8
217 addx r2, r3
218 addx r2, r4
219 addx r2, r5
220 ld r3, @r0+ ; +12
221 ld r4, @r0+ ; +16
222 ld r5, @r0+ ; +20
223 addx r2, r3
224 addx r2, r4
225 addx r2, r5
226 ld r3, @r0+ ; +24
227 ld r4, @r0+ ; +28
228 addi r6, #-1
229 addx r2, r3
230 addx r2, r4
231 bnez r6, 1b
232 addx r2, r6 ; r6=0
233 cmp r0, r0 ; This clears c-bit
234 .fillinsn
235
236 2: and3 r6, r1, #0x1c ; withdraw len
237 beqz r6, 4f
238 srli r6, #2
239 .fillinsn
240
241 3: ld r4, @r0+
242 addi r6, #-1
243 addx r2, r4
244 bnez r6, 3b
245 addx r2, r6 ; r6=0
246 cmp r0, r0 ; This clears c-bit
247 .fillinsn
248
249 4: and3 r1, r1, #3
250 beqz r1, 7f ; if len == 0 goto end
251 and3 r6, r1, #2
252 beqz r6, 5f ; if len < 2 goto 5f(1byte)
253
254 lduh r4, @r0
255 addi r0, #2
256 addi r1, #-2
257 slli r4, #16
258 addx r2, r4
259 beqz r1, 6f
260 .fillinsn
261 5: ldub r4, @r0
262 #ifndef __LITTLE_ENDIAN__
263 slli r4, #8
264 #endif
265 addx r2, r4
266 .fillinsn
267 6: ldi r5, #0
268 addx r2, r5
269 .fillinsn
270 7:
271 and3 r0, r2, #0xffff
272 srli r2, #16
273 add r0, r2
274 srl3 r2, r0, #16
275 beqz r2, 1f
276 addi r0, #1
277 and3 r0, r0, #0xffff
278 .fillinsn
279 1:
280 beqz r7, 1f
281 mv r2, r0
282 srl3 r0, r2, #8
283 and3 r2, r2, #0xff
284 slli r2, #8
285 or r0, r2
286 .fillinsn
287 1:
288 pop r2
289 cmp r0, r0
290 addx r0, r2
291 ldi r2, #0
292 addx r0, r2
293 jmp r14
294
295 #endif /* not CONFIG_ISA_DUAL_ISSUE */
296
297 /*
298 unsigned int csum_partial_copy_generic (const char *src, char *dst,
299 int len, int sum, int *src_err_ptr, int *dst_err_ptr)
300 */
301
302 /*
303 * Copy from ds while checksumming, otherwise like csum_partial
304 *
305 * The macros SRC and DST specify the type of access for the instruction.
306 * thus we can call a custom exception handler for all access types.
307 *
308 * FIXME: could someone double-check whether I haven't mixed up some SRC and
309 * DST definitions? It's damn hard to trigger all cases. I hope I got
310 * them all but there's no guarantee.
311 */
312
313 ENTRY(csum_partial_copy_generic)
314 nop
315 nop
316 nop
317 nop
318 jmp r14
319 nop
320 nop
321 nop
322