Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * User address space access functions. | |
3 | * The non-inlined parts of asm-cris/uaccess.h are here. | |
4 | * | |
5 | * Copyright (C) 2000, Axis Communications AB. | |
6 | * | |
7 | * Written by Hans-Peter Nilsson. | |
8 | * Pieces used from memcpy, originally by Kenny Ranerup long time ago. | |
9 | */ | |
10 | ||
7c0f6ba6 | 11 | #include <linux/uaccess.h> |
1da177e4 LT |
12 | |
13 | /* Asm:s have been tweaked (within the domain of correctness) to give | |
14 | satisfactory results for "gcc version 2.96 20000427 (experimental)". | |
15 | ||
16 | Check regularly... | |
17 | ||
18 | Note that the PC saved at a bus-fault is the address *after* the | |
19 | faulting instruction, which means the branch-target for instructions in | |
20 | delay-slots for taken branches. Note also that the postincrement in | |
21 | the instruction is performed regardless of bus-fault; the register is | |
22 | seen updated in fault handlers. | |
23 | ||
24 | Oh, and on the code formatting issue, to whomever feels like "fixing | |
25 | it" to Conformity: I'm too "lazy", but why don't you go ahead and "fix" | |
26 | string.c too. I just don't think too many people will hack this file | |
27 | for the code format to be an issue. */ | |
28 | ||
29 | ||
30 | /* Copy to userspace. This is based on the memcpy used for | |
31 | kernel-to-kernel copying; see "string.c". */ | |
32 | ||
dbd3c7e1 | 33 | unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn) |
1da177e4 LT |
34 | { |
35 | /* We want the parameters put in special registers. | |
36 | Make sure the compiler is able to make something useful of this. | |
37 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | |
38 | ||
39 | FIXME: Comment for old gcc version. Check. | |
49b4ff33 | 40 | If gcc was alright, it really would need no temporaries, and no |
1da177e4 LT |
41 | stack space to save stuff on. */ |
42 | ||
43 | register char *dst __asm__ ("r13") = pdst; | |
44 | register const char *src __asm__ ("r11") = psrc; | |
45 | register int n __asm__ ("r12") = pn; | |
46 | register int retn __asm__ ("r10") = 0; | |
47 | ||
48 | ||
49 | /* When src is aligned but not dst, this makes a few extra needless | |
50 | cycles. I believe it would take as many to check that the | |
51 | re-alignment was unnecessary. */ | |
52 | if (((unsigned long) dst & 3) != 0 | |
53 | /* Don't align if we wouldn't copy more than a few bytes; so we | |
54 | don't have to check further for overflows. */ | |
55 | && n >= 3) | |
56 | { | |
57 | if ((unsigned long) dst & 1) | |
58 | { | |
59 | __asm_copy_to_user_1 (dst, src, retn); | |
60 | n--; | |
61 | } | |
62 | ||
63 | if ((unsigned long) dst & 2) | |
64 | { | |
65 | __asm_copy_to_user_2 (dst, src, retn); | |
66 | n -= 2; | |
67 | } | |
68 | } | |
69 | ||
70 | /* Decide which copying method to use. */ | |
71 | if (n >= 44*2) /* Break even between movem and | |
72 | move16 is at 38.7*2, but modulo 44. */ | |
73 | { | |
74 | /* For large copies we use 'movem'. */ | |
75 | ||
76 | /* It is not optimal to tell the compiler about clobbering any | |
77 | registers; that will move the saving/restoring of those registers | |
78 | to the function prologue/epilogue, and make non-movem sizes | |
79 | suboptimal. | |
80 | ||
81 | This method is not foolproof; it assumes that the "asm reg" | |
82 | declarations at the beginning of the function really are used | |
83 | here (beware: they may be moved to temporary registers). | |
84 | This way, we do not have to save/move the registers around into | |
85 | temporaries; we can safely use them straight away. | |
86 | ||
87 | If you want to check that the allocation was right; then | |
88 | check the equalities in the first comment. It should say | |
89 | "r13=r13, r11=r11, r12=r12". */ | |
90 | __asm__ volatile ("\ | |
91 | .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ | |
92 | .err \n\ | |
93 | .endif \n\ | |
2b05d2b3 JN |
94 | \n\ |
95 | ;; Save the registers we'll use in the movem process \n\ | |
96 | ;; on the stack. \n\ | |
97 | subq 11*4,$sp \n\ | |
98 | movem $r10,[$sp] \n\ | |
99 | \n\ | |
100 | ;; Now we've got this: \n\ | |
101 | ;; r11 - src \n\ | |
102 | ;; r13 - dst \n\ | |
103 | ;; r12 - n \n\ | |
104 | \n\ | |
105 | ;; Update n for the first loop \n\ | |
106 | subq 44,$r12 \n\ | |
107 | \n\ | |
108 | ; Since the noted PC of a faulting instruction in a delay-slot of a taken \n\ | |
109 | ; branch, is that of the branch target, we actually point at the from-movem \n\ | |
110 | ; for this case. There is no ambiguity here; if there was a fault in that \n\ | |
111 | ; instruction (meaning a kernel oops), the faulted PC would be the address \n\ | |
112 | ; after *that* movem. \n\ | |
113 | \n\ | |
114 | 0: \n\ | |
115 | movem [$r11+],$r10 \n\ | |
116 | subq 44,$r12 \n\ | |
117 | bge 0b \n\ | |
118 | movem $r10,[$r13+] \n\ | |
119 | 1: \n\ | |
120 | addq 44,$r12 ;; compensate for last loop underflowing n \n\ | |
121 | \n\ | |
122 | ;; Restore registers from stack \n\ | |
123 | movem [$sp+],$r10 \n\ | |
124 | 2: \n\ | |
125 | .section .fixup,\"ax\" \n\ | |
126 | \n\ | |
127 | ; To provide a correct count in r10 of bytes that failed to be copied, \n\ | |
128 | ; we jump back into the loop if the loop-branch was taken. There is no \n\ | |
129 | ; performance penalty for sany use; the program will segfault soon enough.\n\ | |
130 | \n\ | |
131 | 3: \n\ | |
132 | move.d [$sp],$r10 \n\ | |
133 | addq 44,$r10 \n\ | |
134 | move.d $r10,[$sp] \n\ | |
135 | jump 0b \n\ | |
136 | 4: \n\ | |
137 | movem [$sp+],$r10 \n\ | |
138 | addq 44,$r10 \n\ | |
139 | addq 44,$r12 \n\ | |
140 | jump 2b \n\ | |
141 | \n\ | |
142 | .previous \n\ | |
143 | .section __ex_table,\"a\" \n\ | |
144 | .dword 0b,3b \n\ | |
145 | .dword 1b,4b \n\ | |
1da177e4 LT |
146 | .previous" |
147 | ||
148 | /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) | |
149 | /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); | |
150 | ||
151 | } | |
152 | ||
153 | /* Either we directly start copying, using dword copying in a loop, or | |
154 | we copy as much as possible with 'movem' and then the last block (<44 | |
155 | bytes) is copied here. This will work since 'movem' will have | |
156 | updated SRC, DST and N. */ | |
157 | ||
158 | while (n >= 16) | |
159 | { | |
160 | __asm_copy_to_user_16 (dst, src, retn); | |
161 | n -= 16; | |
162 | } | |
163 | ||
164 | /* Having a separate by-four loops cuts down on cache footprint. | |
165 | FIXME: Test with and without; increasing switch to be 0..15. */ | |
166 | while (n >= 4) | |
167 | { | |
168 | __asm_copy_to_user_4 (dst, src, retn); | |
169 | n -= 4; | |
170 | } | |
171 | ||
172 | switch (n) | |
173 | { | |
174 | case 0: | |
175 | break; | |
176 | case 1: | |
177 | __asm_copy_to_user_1 (dst, src, retn); | |
178 | break; | |
179 | case 2: | |
180 | __asm_copy_to_user_2 (dst, src, retn); | |
181 | break; | |
182 | case 3: | |
183 | __asm_copy_to_user_3 (dst, src, retn); | |
184 | break; | |
185 | } | |
186 | ||
187 | return retn; | |
188 | } | |
dbd3c7e1 | 189 | EXPORT_SYMBOL(__copy_user); |
1da177e4 LT |
190 | |
191 | /* Copy from user to kernel, zeroing the bytes that were inaccessible in | |
192 | userland. The return-value is the number of bytes that were | |
193 | inaccessible. */ | |
194 | ||
dbd3c7e1 JN |
195 | unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, |
196 | unsigned long pn) | |
1da177e4 LT |
197 | { |
198 | /* We want the parameters put in special registers. | |
199 | Make sure the compiler is able to make something useful of this. | |
200 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | |
201 | ||
202 | FIXME: Comment for old gcc version. Check. | |
49b4ff33 | 203 | If gcc was alright, it really would need no temporaries, and no |
1da177e4 LT |
204 | stack space to save stuff on. */ |
205 | ||
206 | register char *dst __asm__ ("r13") = pdst; | |
207 | register const char *src __asm__ ("r11") = psrc; | |
208 | register int n __asm__ ("r12") = pn; | |
209 | register int retn __asm__ ("r10") = 0; | |
210 | ||
211 | /* The best reason to align src is that we then know that a read-fault | |
212 | was for aligned bytes; there's no 1..3 remaining good bytes to | |
213 | pickle. */ | |
214 | if (((unsigned long) src & 3) != 0) | |
215 | { | |
216 | if (((unsigned long) src & 1) && n != 0) | |
217 | { | |
218 | __asm_copy_from_user_1 (dst, src, retn); | |
219 | n--; | |
220 | } | |
221 | ||
222 | if (((unsigned long) src & 2) && n >= 2) | |
223 | { | |
224 | __asm_copy_from_user_2 (dst, src, retn); | |
225 | n -= 2; | |
226 | } | |
227 | ||
228 | /* We only need one check after the unalignment-adjustments, because | |
229 | if both adjustments were done, either both or neither reference | |
230 | had an exception. */ | |
231 | if (retn != 0) | |
232 | goto copy_exception_bytes; | |
233 | } | |
234 | ||
235 | /* Decide which copying method to use. */ | |
236 | if (n >= 44*2) /* Break even between movem and | |
237 | move16 is at 38.7*2, but modulo 44. | |
238 | FIXME: We use move4 now. */ | |
239 | { | |
240 | /* For large copies we use 'movem' */ | |
241 | ||
242 | /* It is not optimal to tell the compiler about clobbering any | |
243 | registers; that will move the saving/restoring of those registers | |
244 | to the function prologue/epilogue, and make non-movem sizes | |
245 | suboptimal. | |
246 | ||
247 | This method is not foolproof; it assumes that the "asm reg" | |
248 | declarations at the beginning of the function really are used | |
249 | here (beware: they may be moved to temporary registers). | |
250 | This way, we do not have to save/move the registers around into | |
251 | temporaries; we can safely use them straight away. | |
252 | ||
253 | If you want to check that the allocation was right; then | |
254 | check the equalities in the first comment. It should say | |
255 | "r13=r13, r11=r11, r12=r12" */ | |
2b05d2b3 | 256 | __asm__ volatile ("\n\ |
1da177e4 LT |
257 | .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ |
258 | .err \n\ | |
259 | .endif \n\ | |
2b05d2b3 JN |
260 | \n\ |
261 | ;; Save the registers we'll use in the movem process \n\ | |
262 | ;; on the stack. \n\ | |
263 | subq 11*4,$sp \n\ | |
264 | movem $r10,[$sp] \n\ | |
265 | \n\ | |
266 | ;; Now we've got this: \n\ | |
267 | ;; r11 - src \n\ | |
268 | ;; r13 - dst \n\ | |
269 | ;; r12 - n \n\ | |
270 | \n\ | |
271 | ;; Update n for the first loop \n\ | |
272 | subq 44,$r12 \n\ | |
273 | 0: \n\ | |
274 | movem [$r11+],$r10 \n\ | |
275 | 1: \n\ | |
276 | subq 44,$r12 \n\ | |
277 | bge 0b \n\ | |
278 | movem $r10,[$r13+] \n\ | |
279 | \n\ | |
280 | addq 44,$r12 ;; compensate for last loop underflowing n \n\ | |
281 | \n\ | |
282 | ;; Restore registers from stack \n\ | |
283 | movem [$sp+],$r10 \n\ | |
284 | 4: \n\ | |
285 | .section .fixup,\"ax\" \n\ | |
286 | \n\ | |
287 | ;; Do not jump back into the loop if we fail. For some uses, we get a \n\ | |
288 | ;; page fault somewhere on the line. Without checking for page limits, \n\ | |
289 | ;; we don't know where, but we need to copy accurately and keep an \n\ | |
290 | ;; accurate count; not just clear the whole line. To do that, we fall \n\ | |
291 | ;; down in the code below, proceeding with smaller amounts. It should \n\ | |
292 | ;; be kept in mind that we have to cater to code like what at one time \n\ | |
293 | ;; was in fs/super.c: \n\ | |
294 | ;; i = size - copy_from_user((void *)page, data, size); \n\ | |
295 | ;; which would cause repeated faults while clearing the remainder of \n\ | |
296 | ;; the SIZE bytes at PAGE after the first fault. \n\ | |
297 | ;; A caveat here is that we must not fall through from a failing page \n\ | |
298 | ;; to a valid page. \n\ | |
299 | \n\ | |
300 | 3: \n\ | |
301 | movem [$sp+],$r10 \n\ | |
302 | addq 44,$r12 ;; Get back count before faulting point. \n\ | |
303 | subq 44,$r11 ;; Get back pointer to faulting movem-line. \n\ | |
304 | jump 4b ;; Fall through, pretending the fault didn't happen.\n\ | |
305 | \n\ | |
306 | .previous \n\ | |
307 | .section __ex_table,\"a\" \n\ | |
308 | .dword 1b,3b \n\ | |
1da177e4 LT |
309 | .previous" |
310 | ||
311 | /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) | |
312 | /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); | |
313 | ||
314 | } | |
315 | ||
316 | /* Either we directly start copying here, using dword copying in a loop, | |
317 | or we copy as much as possible with 'movem' and then the last block | |
318 | (<44 bytes) is copied here. This will work since 'movem' will have | |
319 | updated src, dst and n. (Except with failing src.) | |
320 | ||
321 | Since we want to keep src accurate, we can't use | |
322 | __asm_copy_from_user_N with N != (1, 2, 4); it updates dst and | |
323 | retn, but not src (by design; it's value is ignored elsewhere). */ | |
324 | ||
325 | while (n >= 4) | |
326 | { | |
327 | __asm_copy_from_user_4 (dst, src, retn); | |
328 | n -= 4; | |
329 | ||
330 | if (retn) | |
331 | goto copy_exception_bytes; | |
332 | } | |
333 | ||
334 | /* If we get here, there were no memory read faults. */ | |
335 | switch (n) | |
336 | { | |
337 | /* These copies are at least "naturally aligned" (so we don't have | |
338 | to check each byte), due to the src alignment code before the | |
339 | movem loop. The *_3 case *will* get the correct count for retn. */ | |
340 | case 0: | |
341 | /* This case deliberately left in (if you have doubts check the | |
342 | generated assembly code). */ | |
343 | break; | |
344 | case 1: | |
345 | __asm_copy_from_user_1 (dst, src, retn); | |
346 | break; | |
347 | case 2: | |
348 | __asm_copy_from_user_2 (dst, src, retn); | |
349 | break; | |
350 | case 3: | |
351 | __asm_copy_from_user_3 (dst, src, retn); | |
352 | break; | |
353 | } | |
354 | ||
355 | /* If we get here, retn correctly reflects the number of failing | |
356 | bytes. */ | |
357 | return retn; | |
358 | ||
359 | copy_exception_bytes: | |
360 | /* We already have "retn" bytes cleared, and need to clear the | |
361 | remaining "n" bytes. A non-optimized simple byte-for-byte in-line | |
362 | memset is preferred here, since this isn't speed-critical code and | |
363 | we'd rather have this a leaf-function than calling memset. */ | |
364 | { | |
365 | char *endp; | |
366 | for (endp = dst + n; dst < endp; dst++) | |
367 | *dst = 0; | |
368 | } | |
369 | ||
370 | return retn + n; | |
371 | } | |
dbd3c7e1 | 372 | EXPORT_SYMBOL(__copy_user_zeroing); |
1da177e4 LT |
373 | |
374 | /* Zero userspace. */ | |
dbd3c7e1 | 375 | unsigned long __do_clear_user(void __user *pto, unsigned long pn) |
1da177e4 LT |
376 | { |
377 | /* We want the parameters put in special registers. | |
378 | Make sure the compiler is able to make something useful of this. | |
379 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | |
380 | ||
381 | FIXME: Comment for old gcc version. Check. | |
49b4ff33 | 382 | If gcc was alright, it really would need no temporaries, and no |
1da177e4 LT |
383 | stack space to save stuff on. */ |
384 | ||
385 | register char *dst __asm__ ("r13") = pto; | |
386 | register int n __asm__ ("r12") = pn; | |
387 | register int retn __asm__ ("r10") = 0; | |
388 | ||
389 | ||
390 | if (((unsigned long) dst & 3) != 0 | |
391 | /* Don't align if we wouldn't copy more than a few bytes. */ | |
392 | && n >= 3) | |
393 | { | |
394 | if ((unsigned long) dst & 1) | |
395 | { | |
396 | __asm_clear_1 (dst, retn); | |
397 | n--; | |
398 | } | |
399 | ||
400 | if ((unsigned long) dst & 2) | |
401 | { | |
402 | __asm_clear_2 (dst, retn); | |
403 | n -= 2; | |
404 | } | |
405 | } | |
406 | ||
407 | /* Decide which copying method to use. | |
408 | FIXME: This number is from the "ordinary" kernel memset. */ | |
409 | if (n >= (1*48)) | |
410 | { | |
411 | /* For large clears we use 'movem' */ | |
412 | ||
413 | /* It is not optimal to tell the compiler about clobbering any | |
414 | call-saved registers; that will move the saving/restoring of | |
415 | those registers to the function prologue/epilogue, and make | |
416 | non-movem sizes suboptimal. | |
417 | ||
418 | This method is not foolproof; it assumes that the "asm reg" | |
419 | declarations at the beginning of the function really are used | |
420 | here (beware: they may be moved to temporary registers). | |
421 | This way, we do not have to save/move the registers around into | |
422 | temporaries; we can safely use them straight away. | |
423 | ||
424 | If you want to check that the allocation was right; then | |
425 | check the equalities in the first comment. It should say | |
426 | something like "r13=r13, r11=r11, r12=r12". */ | |
2b05d2b3 | 427 | __asm__ volatile ("\n\ |
1da177e4 LT |
428 | .ifnc %0%1%2,$r13$r12$r10 \n\ |
429 | .err \n\ | |
430 | .endif \n\ | |
2b05d2b3 JN |
431 | \n\ |
432 | ;; Save the registers we'll clobber in the movem process \n\ | |
433 | ;; on the stack. Don't mention them to gcc, it will only be \n\ | |
434 | ;; upset. \n\ | |
435 | subq 11*4,$sp \n\ | |
436 | movem $r10,[$sp] \n\ | |
437 | \n\ | |
438 | clear.d $r0 \n\ | |
439 | clear.d $r1 \n\ | |
440 | clear.d $r2 \n\ | |
441 | clear.d $r3 \n\ | |
442 | clear.d $r4 \n\ | |
443 | clear.d $r5 \n\ | |
444 | clear.d $r6 \n\ | |
445 | clear.d $r7 \n\ | |
446 | clear.d $r8 \n\ | |
447 | clear.d $r9 \n\ | |
448 | clear.d $r10 \n\ | |
449 | clear.d $r11 \n\ | |
450 | \n\ | |
451 | ;; Now we've got this: \n\ | |
452 | ;; r13 - dst \n\ | |
453 | ;; r12 - n \n\ | |
454 | \n\ | |
455 | ;; Update n for the first loop \n\ | |
456 | subq 12*4,$r12 \n\ | |
457 | 0: \n\ | |
458 | subq 12*4,$r12 \n\ | |
459 | bge 0b \n\ | |
460 | movem $r11,[$r13+] \n\ | |
461 | 1: \n\ | |
462 | addq 12*4,$r12 ;; compensate for last loop underflowing n\n\ | |
463 | \n\ | |
464 | ;; Restore registers from stack \n\ | |
465 | movem [$sp+],$r10 \n\ | |
466 | 2: \n\ | |
467 | .section .fixup,\"ax\" \n\ | |
468 | 3: \n\ | |
469 | move.d [$sp],$r10 \n\ | |
470 | addq 12*4,$r10 \n\ | |
471 | move.d $r10,[$sp] \n\ | |
472 | clear.d $r10 \n\ | |
473 | jump 0b \n\ | |
474 | \n\ | |
475 | 4: \n\ | |
476 | movem [$sp+],$r10 \n\ | |
477 | addq 12*4,$r10 \n\ | |
478 | addq 12*4,$r12 \n\ | |
479 | jump 2b \n\ | |
480 | \n\ | |
481 | .previous \n\ | |
482 | .section __ex_table,\"a\" \n\ | |
483 | .dword 0b,3b \n\ | |
484 | .dword 1b,4b \n\ | |
1da177e4 LT |
485 | .previous" |
486 | ||
487 | /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn) | |
488 | /* Inputs */ : "0" (dst), "1" (n), "2" (retn) | |
489 | /* Clobber */ : "r11"); | |
490 | } | |
491 | ||
492 | while (n >= 16) | |
493 | { | |
494 | __asm_clear_16 (dst, retn); | |
495 | n -= 16; | |
496 | } | |
497 | ||
498 | /* Having a separate by-four loops cuts down on cache footprint. | |
499 | FIXME: Test with and without; increasing switch to be 0..15. */ | |
500 | while (n >= 4) | |
501 | { | |
502 | __asm_clear_4 (dst, retn); | |
503 | n -= 4; | |
504 | } | |
505 | ||
506 | switch (n) | |
507 | { | |
508 | case 0: | |
509 | break; | |
510 | case 1: | |
511 | __asm_clear_1 (dst, retn); | |
512 | break; | |
513 | case 2: | |
514 | __asm_clear_2 (dst, retn); | |
515 | break; | |
516 | case 3: | |
517 | __asm_clear_3 (dst, retn); | |
518 | break; | |
519 | } | |
520 | ||
521 | return retn; | |
522 | } | |
dbd3c7e1 | 523 | EXPORT_SYMBOL(__do_clear_user); |