Merge tag 'disintegrate-fbdev-20121220' of git://git.infradead.org/users/dhowells...
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / powerpc / lib / copyuser_64.S
1 /*
2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9 #include <asm/processor.h>
10 #include <asm/ppc_asm.h>
11
12 .align 7
13 _GLOBAL(__copy_tofrom_user)
14 BEGIN_FTR_SECTION
15 nop
16 FTR_SECTION_ELSE
17 b __copy_tofrom_user_power7
18 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
19 _GLOBAL(__copy_tofrom_user_base)
20 /* first check for a whole page copy on a page boundary */
21 cmpldi cr1,r5,16
22 cmpdi cr6,r5,4096
23 or r0,r3,r4
24 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
25 andi. r0,r0,4095
26 std r3,-24(r1)
27 crand cr0*4+2,cr0*4+2,cr6*4+2
28 std r4,-16(r1)
29 std r5,-8(r1)
30 dcbt 0,r4
31 beq .Lcopy_page_4K
32 andi. r6,r6,7
33 PPC_MTOCRF(0x01,r5)
34 blt cr1,.Lshort_copy
35 /* Below we want to nop out the bne if we're on a CPU that has the
36 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
37 * cleared.
38 * At the time of writing the only CPU that has this combination of bits
39 * set is Power6.
40 */
41 BEGIN_FTR_SECTION
42 nop
43 FTR_SECTION_ELSE
44 bne .Ldst_unaligned
45 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
46 CPU_FTR_UNALIGNED_LD_STD)
47 .Ldst_aligned:
48 addi r3,r3,-16
49 BEGIN_FTR_SECTION
50 andi. r0,r4,7
51 bne .Lsrc_unaligned
52 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
53 blt cr1,.Ldo_tail /* if < 16 bytes to copy */
54 srdi r0,r5,5
55 cmpdi cr1,r0,0
56 20: ld r7,0(r4)
57 220: ld r6,8(r4)
58 addi r4,r4,16
59 mtctr r0
60 andi. r0,r5,0x10
61 beq 22f
62 addi r3,r3,16
63 addi r4,r4,-16
64 mr r9,r7
65 mr r8,r6
66 beq cr1,72f
67 21: ld r7,16(r4)
68 221: ld r6,24(r4)
69 addi r4,r4,32
70 70: std r9,0(r3)
71 270: std r8,8(r3)
72 22: ld r9,0(r4)
73 222: ld r8,8(r4)
74 71: std r7,16(r3)
75 271: std r6,24(r3)
76 addi r3,r3,32
77 bdnz 21b
78 72: std r9,0(r3)
79 272: std r8,8(r3)
80 andi. r5,r5,0xf
81 beq+ 3f
82 addi r4,r4,16
83 .Ldo_tail:
84 addi r3,r3,16
85 bf cr7*4+0,246f
86 244: ld r9,0(r4)
87 addi r4,r4,8
88 245: std r9,0(r3)
89 addi r3,r3,8
90 246: bf cr7*4+1,1f
91 23: lwz r9,0(r4)
92 addi r4,r4,4
93 73: stw r9,0(r3)
94 addi r3,r3,4
95 1: bf cr7*4+2,2f
96 44: lhz r9,0(r4)
97 addi r4,r4,2
98 74: sth r9,0(r3)
99 addi r3,r3,2
100 2: bf cr7*4+3,3f
101 45: lbz r9,0(r4)
102 75: stb r9,0(r3)
103 3: li r3,0
104 blr
105
106 .Lsrc_unaligned:
107 srdi r6,r5,3
108 addi r5,r5,-16
109 subf r4,r0,r4
110 srdi r7,r5,4
111 sldi r10,r0,3
112 cmpldi cr6,r6,3
113 andi. r5,r5,7
114 mtctr r7
115 subfic r11,r10,64
116 add r5,r5,r0
117 bt cr7*4+0,28f
118
119 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
120 25: ld r0,8(r4)
121 sld r6,r9,r10
122 26: ldu r9,16(r4)
123 srd r7,r0,r11
124 sld r8,r0,r10
125 or r7,r7,r6
126 blt cr6,79f
127 27: ld r0,8(r4)
128 b 2f
129
130 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
131 29: ldu r9,8(r4)
132 sld r8,r0,r10
133 addi r3,r3,-8
134 blt cr6,5f
135 30: ld r0,8(r4)
136 srd r12,r9,r11
137 sld r6,r9,r10
138 31: ldu r9,16(r4)
139 or r12,r8,r12
140 srd r7,r0,r11
141 sld r8,r0,r10
142 addi r3,r3,16
143 beq cr6,78f
144
145 1: or r7,r7,r6
146 32: ld r0,8(r4)
147 76: std r12,8(r3)
148 2: srd r12,r9,r11
149 sld r6,r9,r10
150 33: ldu r9,16(r4)
151 or r12,r8,r12
152 77: stdu r7,16(r3)
153 srd r7,r0,r11
154 sld r8,r0,r10
155 bdnz 1b
156
157 78: std r12,8(r3)
158 or r7,r7,r6
159 79: std r7,16(r3)
160 5: srd r12,r9,r11
161 or r12,r8,r12
162 80: std r12,24(r3)
163 bne 6f
164 li r3,0
165 blr
166 6: cmpwi cr1,r5,8
167 addi r3,r3,32
168 sld r9,r9,r10
169 ble cr1,7f
170 34: ld r0,8(r4)
171 srd r7,r0,r11
172 or r9,r7,r9
173 7:
174 bf cr7*4+1,1f
175 rotldi r9,r9,32
176 94: stw r9,0(r3)
177 addi r3,r3,4
178 1: bf cr7*4+2,2f
179 rotldi r9,r9,16
180 95: sth r9,0(r3)
181 addi r3,r3,2
182 2: bf cr7*4+3,3f
183 rotldi r9,r9,8
184 96: stb r9,0(r3)
185 3: li r3,0
186 blr
187
188 .Ldst_unaligned:
189 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
190 subf r5,r6,r5
191 li r7,0
192 cmpldi cr1,r5,16
193 bf cr7*4+3,1f
194 35: lbz r0,0(r4)
195 81: stb r0,0(r3)
196 addi r7,r7,1
197 1: bf cr7*4+2,2f
198 36: lhzx r0,r7,r4
199 82: sthx r0,r7,r3
200 addi r7,r7,2
201 2: bf cr7*4+1,3f
202 37: lwzx r0,r7,r4
203 83: stwx r0,r7,r3
204 3: PPC_MTOCRF(0x01,r5)
205 add r4,r6,r4
206 add r3,r6,r3
207 b .Ldst_aligned
208
209 .Lshort_copy:
210 bf cr7*4+0,1f
211 38: lwz r0,0(r4)
212 39: lwz r9,4(r4)
213 addi r4,r4,8
214 84: stw r0,0(r3)
215 85: stw r9,4(r3)
216 addi r3,r3,8
217 1: bf cr7*4+1,2f
218 40: lwz r0,0(r4)
219 addi r4,r4,4
220 86: stw r0,0(r3)
221 addi r3,r3,4
222 2: bf cr7*4+2,3f
223 41: lhz r0,0(r4)
224 addi r4,r4,2
225 87: sth r0,0(r3)
226 addi r3,r3,2
227 3: bf cr7*4+3,4f
228 42: lbz r0,0(r4)
229 88: stb r0,0(r3)
230 4: li r3,0
231 blr
232
233 /*
234 * exception handlers follow
235 * we have to return the number of bytes not copied
236 * for an exception on a load, we set the rest of the destination to 0
237 */
238
239 136:
240 137:
241 add r3,r3,r7
242 b 1f
243 130:
244 131:
245 addi r3,r3,8
246 120:
247 320:
248 122:
249 322:
250 124:
251 125:
252 126:
253 127:
254 128:
255 129:
256 133:
257 addi r3,r3,8
258 132:
259 addi r3,r3,8
260 121:
261 321:
262 344:
263 134:
264 135:
265 138:
266 139:
267 140:
268 141:
269 142:
270 123:
271 144:
272 145:
273
274 /*
275 * here we have had a fault on a load and r3 points to the first
276 * unmodified byte of the destination
277 */
278 1: ld r6,-24(r1)
279 ld r4,-16(r1)
280 ld r5,-8(r1)
281 subf r6,r6,r3
282 add r4,r4,r6
283 subf r5,r6,r5 /* #bytes left to go */
284
285 /*
286 * first see if we can copy any more bytes before hitting another exception
287 */
288 mtctr r5
289 43: lbz r0,0(r4)
290 addi r4,r4,1
291 89: stb r0,0(r3)
292 addi r3,r3,1
293 bdnz 43b
294 li r3,0 /* huh? all copied successfully this time? */
295 blr
296
297 /*
298 * here we have trapped again, need to clear ctr bytes starting at r3
299 */
300 143: mfctr r5
301 li r0,0
302 mr r4,r3
303 mr r3,r5 /* return the number of bytes not copied */
304 1: andi. r9,r4,7
305 beq 3f
306 90: stb r0,0(r4)
307 addic. r5,r5,-1
308 addi r4,r4,1
309 bne 1b
310 blr
311 3: cmpldi cr1,r5,8
312 srdi r9,r5,3
313 andi. r5,r5,7
314 blt cr1,93f
315 mtctr r9
316 91: std r0,0(r4)
317 addi r4,r4,8
318 bdnz 91b
319 93: beqlr
320 mtctr r5
321 92: stb r0,0(r4)
322 addi r4,r4,1
323 bdnz 92b
324 blr
325
326 /*
327 * exception handlers for stores: we just need to work
328 * out how many bytes weren't copied
329 */
330 182:
331 183:
332 add r3,r3,r7
333 b 1f
334 371:
335 180:
336 addi r3,r3,8
337 171:
338 177:
339 addi r3,r3,8
340 370:
341 372:
342 176:
343 178:
344 addi r3,r3,4
345 185:
346 addi r3,r3,4
347 170:
348 172:
349 345:
350 173:
351 174:
352 175:
353 179:
354 181:
355 184:
356 186:
357 187:
358 188:
359 189:
360 194:
361 195:
362 196:
363 1:
364 ld r6,-24(r1)
365 ld r5,-8(r1)
366 add r6,r6,r5
367 subf r3,r3,r6 /* #bytes not copied */
368 190:
369 191:
370 192:
371 blr /* #bytes not copied in r3 */
372
373 .section __ex_table,"a"
374 .align 3
375 .llong 20b,120b
376 .llong 220b,320b
377 .llong 21b,121b
378 .llong 221b,321b
379 .llong 70b,170b
380 .llong 270b,370b
381 .llong 22b,122b
382 .llong 222b,322b
383 .llong 71b,171b
384 .llong 271b,371b
385 .llong 72b,172b
386 .llong 272b,372b
387 .llong 244b,344b
388 .llong 245b,345b
389 .llong 23b,123b
390 .llong 73b,173b
391 .llong 44b,144b
392 .llong 74b,174b
393 .llong 45b,145b
394 .llong 75b,175b
395 .llong 24b,124b
396 .llong 25b,125b
397 .llong 26b,126b
398 .llong 27b,127b
399 .llong 28b,128b
400 .llong 29b,129b
401 .llong 30b,130b
402 .llong 31b,131b
403 .llong 32b,132b
404 .llong 76b,176b
405 .llong 33b,133b
406 .llong 77b,177b
407 .llong 78b,178b
408 .llong 79b,179b
409 .llong 80b,180b
410 .llong 34b,134b
411 .llong 94b,194b
412 .llong 95b,195b
413 .llong 96b,196b
414 .llong 35b,135b
415 .llong 81b,181b
416 .llong 36b,136b
417 .llong 82b,182b
418 .llong 37b,137b
419 .llong 83b,183b
420 .llong 38b,138b
421 .llong 39b,139b
422 .llong 84b,184b
423 .llong 85b,185b
424 .llong 40b,140b
425 .llong 86b,186b
426 .llong 41b,141b
427 .llong 87b,187b
428 .llong 42b,142b
429 .llong 88b,188b
430 .llong 43b,143b
431 .llong 89b,189b
432 .llong 90b,190b
433 .llong 91b,191b
434 .llong 92b,192b
435
436 .text
437
438 /*
439 * Routine to copy a whole page of data, optimized for POWER4.
440 * On POWER4 it is more than 50% faster than the simple loop
441 * above (following the .Ldst_aligned label) but it runs slightly
442 * slower on POWER3.
443 */
444 .Lcopy_page_4K:
445 std r31,-32(1)
446 std r30,-40(1)
447 std r29,-48(1)
448 std r28,-56(1)
449 std r27,-64(1)
450 std r26,-72(1)
451 std r25,-80(1)
452 std r24,-88(1)
453 std r23,-96(1)
454 std r22,-104(1)
455 std r21,-112(1)
456 std r20,-120(1)
457 li r5,4096/32 - 1
458 addi r3,r3,-8
459 li r0,5
460 0: addi r5,r5,-24
461 mtctr r0
462 20: ld r22,640(4)
463 21: ld r21,512(4)
464 22: ld r20,384(4)
465 23: ld r11,256(4)
466 24: ld r9,128(4)
467 25: ld r7,0(4)
468 26: ld r25,648(4)
469 27: ld r24,520(4)
470 28: ld r23,392(4)
471 29: ld r10,264(4)
472 30: ld r8,136(4)
473 31: ldu r6,8(4)
474 cmpwi r5,24
475 1:
476 32: std r22,648(3)
477 33: std r21,520(3)
478 34: std r20,392(3)
479 35: std r11,264(3)
480 36: std r9,136(3)
481 37: std r7,8(3)
482 38: ld r28,648(4)
483 39: ld r27,520(4)
484 40: ld r26,392(4)
485 41: ld r31,264(4)
486 42: ld r30,136(4)
487 43: ld r29,8(4)
488 44: std r25,656(3)
489 45: std r24,528(3)
490 46: std r23,400(3)
491 47: std r10,272(3)
492 48: std r8,144(3)
493 49: std r6,16(3)
494 50: ld r22,656(4)
495 51: ld r21,528(4)
496 52: ld r20,400(4)
497 53: ld r11,272(4)
498 54: ld r9,144(4)
499 55: ld r7,16(4)
500 56: std r28,664(3)
501 57: std r27,536(3)
502 58: std r26,408(3)
503 59: std r31,280(3)
504 60: std r30,152(3)
505 61: stdu r29,24(3)
506 62: ld r25,664(4)
507 63: ld r24,536(4)
508 64: ld r23,408(4)
509 65: ld r10,280(4)
510 66: ld r8,152(4)
511 67: ldu r6,24(4)
512 bdnz 1b
513 68: std r22,648(3)
514 69: std r21,520(3)
515 70: std r20,392(3)
516 71: std r11,264(3)
517 72: std r9,136(3)
518 73: std r7,8(3)
519 74: addi r4,r4,640
520 75: addi r3,r3,648
521 bge 0b
522 mtctr r5
523 76: ld r7,0(4)
524 77: ld r8,8(4)
525 78: ldu r9,16(4)
526 3:
527 79: ld r10,8(4)
528 80: std r7,8(3)
529 81: ld r7,16(4)
530 82: std r8,16(3)
531 83: ld r8,24(4)
532 84: std r9,24(3)
533 85: ldu r9,32(4)
534 86: stdu r10,32(3)
535 bdnz 3b
536 4:
537 87: ld r10,8(4)
538 88: std r7,8(3)
539 89: std r8,16(3)
540 90: std r9,24(3)
541 91: std r10,32(3)
542 9: ld r20,-120(1)
543 ld r21,-112(1)
544 ld r22,-104(1)
545 ld r23,-96(1)
546 ld r24,-88(1)
547 ld r25,-80(1)
548 ld r26,-72(1)
549 ld r27,-64(1)
550 ld r28,-56(1)
551 ld r29,-48(1)
552 ld r30,-40(1)
553 ld r31,-32(1)
554 li r3,0
555 blr
556
557 /*
558 * on an exception, reset to the beginning and jump back into the
559 * standard __copy_tofrom_user
560 */
561 100: ld r20,-120(1)
562 ld r21,-112(1)
563 ld r22,-104(1)
564 ld r23,-96(1)
565 ld r24,-88(1)
566 ld r25,-80(1)
567 ld r26,-72(1)
568 ld r27,-64(1)
569 ld r28,-56(1)
570 ld r29,-48(1)
571 ld r30,-40(1)
572 ld r31,-32(1)
573 ld r3,-24(r1)
574 ld r4,-16(r1)
575 li r5,4096
576 b .Ldst_aligned
577
578 .section __ex_table,"a"
579 .align 3
580 .llong 20b,100b
581 .llong 21b,100b
582 .llong 22b,100b
583 .llong 23b,100b
584 .llong 24b,100b
585 .llong 25b,100b
586 .llong 26b,100b
587 .llong 27b,100b
588 .llong 28b,100b
589 .llong 29b,100b
590 .llong 30b,100b
591 .llong 31b,100b
592 .llong 32b,100b
593 .llong 33b,100b
594 .llong 34b,100b
595 .llong 35b,100b
596 .llong 36b,100b
597 .llong 37b,100b
598 .llong 38b,100b
599 .llong 39b,100b
600 .llong 40b,100b
601 .llong 41b,100b
602 .llong 42b,100b
603 .llong 43b,100b
604 .llong 44b,100b
605 .llong 45b,100b
606 .llong 46b,100b
607 .llong 47b,100b
608 .llong 48b,100b
609 .llong 49b,100b
610 .llong 50b,100b
611 .llong 51b,100b
612 .llong 52b,100b
613 .llong 53b,100b
614 .llong 54b,100b
615 .llong 55b,100b
616 .llong 56b,100b
617 .llong 57b,100b
618 .llong 58b,100b
619 .llong 59b,100b
620 .llong 60b,100b
621 .llong 61b,100b
622 .llong 62b,100b
623 .llong 63b,100b
624 .llong 64b,100b
625 .llong 65b,100b
626 .llong 66b,100b
627 .llong 67b,100b
628 .llong 68b,100b
629 .llong 69b,100b
630 .llong 70b,100b
631 .llong 71b,100b
632 .llong 72b,100b
633 .llong 73b,100b
634 .llong 74b,100b
635 .llong 75b,100b
636 .llong 76b,100b
637 .llong 77b,100b
638 .llong 78b,100b
639 .llong 79b,100b
640 .llong 80b,100b
641 .llong 81b,100b
642 .llong 82b,100b
643 .llong 83b,100b
644 .llong 84b,100b
645 .llong 85b,100b
646 .llong 86b,100b
647 .llong 87b,100b
648 .llong 88b,100b
649 .llong 89b,100b
650 .llong 90b,100b
651 .llong 91b,100b