[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / arch / cris / arch-v10 / lib / usercopy.c

/*
 * User address space access functions.
 * The non-inlined parts of asm-cris/uaccess.h are here.
 *
 * Copyright (C) 2000, Axis Communications AB.
 *
 * Written by Hans-Peter Nilsson.
 * Pieces used from memcpy, originally by Kenny Ranerup long time ago.
 */

#include <linux/uaccess.h>

/* Asm:s have been tweaked (within the domain of correctness) to give
   satisfactory results for "gcc version 2.96 20000427 (experimental)".

   Check regularly...

   Note that the PC saved at a bus-fault is the address *after* the
   faulting instruction, which means the branch-target for instructions in
   delay-slots for taken branches.  Note also that the postincrement in
   the instruction is performed regardless of bus-fault; the register is
   seen updated in fault handlers.

   Oh, and on the code formatting issue, to whomever feels like "fixing
   it" to Conformity: I'm too "lazy", but why don't you go ahead and "fix"
   string.c too.  I just don't think too many people will hack this file
   for the code format to be an issue.  */


/* Copy to userspace.  This is based on the memcpy used for
   kernel-to-kernel copying; see "string.c".  */

unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn)
{
  /* We want the parameters put in special registers.
     Make sure the compiler is able to make something useful of this.
     As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).

     FIXME: Comment for old gcc version.  Check.
     If gcc was alright, it really would need no temporaries, and no
     stack space to save stuff on. */

  register char *dst __asm__ ("r13") = pdst;
  register const char *src __asm__ ("r11") = psrc;
  register int n __asm__ ("r12") = pn;
  register int retn __asm__ ("r10") = 0;


  /* When src is aligned but not dst, this makes a few extra needless
     cycles.  I believe it would take as many to check that the
     re-alignment was unnecessary.  */
  if (((unsigned long) dst & 3) != 0
      /* Don't align if we wouldn't copy more than a few bytes; so we
	 don't have to check further for overflows.  */
      && n >= 3)
  {
    if ((unsigned long) dst & 1)
    {
      __asm_copy_to_user_1 (dst, src, retn);
      n--;
    }

    if ((unsigned long) dst & 2)
    {
      __asm_copy_to_user_2 (dst, src, retn);
      n -= 2;
    }
  }

  /* Decide which copying method to use. */
  if (n >= 44*2)		/* Break even between movem and
				   move16 is at 38.7*2, but modulo 44. */
  {
    /* For large copies we use 'movem'.  */

    /* It is not optimal to tell the compiler about clobbering any
       registers; that will move the saving/restoring of those registers
       to the function prologue/epilogue, and make non-movem sizes
       suboptimal.

       This method is not foolproof; it assumes that the "asm reg"
       declarations at the beginning of the function really are used
       here (beware: they may be moved to temporary registers).
       This way, we do not have to save/move the registers around into
       temporaries; we can safely use them straight away.

       If you want to check that the allocation was right; then
       check the equalities in the first comment.  It should say
       "r13=r13, r11=r11, r12=r12".  */
    __asm__ volatile ("\
	.ifnc %0%1%2%3,$r13$r11$r12$r10					\n\
	.err								\n\
	.endif								\n\
									\n\
	;; Save the registers we'll use in the movem process		\n\
	;; on the stack.						\n\
	subq	11*4,$sp						\n\
	movem	$r10,[$sp]						\n\
									\n\
	;; Now we've got this:						\n\
	;; r11 - src							\n\
	;; r13 - dst							\n\
	;; r12 - n							\n\
									\n\
	;; Update n for the first loop					\n\
	subq	44,$r12							\n\
									\n\
; Since the noted PC of a faulting instruction in a delay-slot of a taken \n\
; branch, is that of the branch target, we actually point at the from-movem \n\
; for this case.  There is no ambiguity here; if there was a fault in that \n\
; instruction (meaning a kernel oops), the faulted PC would be the address \n\
; after *that* movem.							\n\
									\n\
0:									\n\
	movem	[$r11+],$r10						\n\
	subq   44,$r12							\n\
	bge	0b							\n\
	movem	$r10,[$r13+]						\n\
1:									\n\
	addq   44,$r12  ;; compensate for last loop underflowing n	\n\
									\n\
	;; Restore registers from stack					\n\
	movem [$sp+],$r10						\n\
2:									\n\
	.section .fixup,\"ax\"						\n\
									\n\
; To provide a correct count in r10 of bytes that failed to be copied,	\n\
; we jump back into the loop if the loop-branch was taken.  There is no	\n\
; performance penalty for sany use; the program will segfault soon enough.\n\
									\n\
3:									\n\
	move.d [$sp],$r10						\n\
	addq 44,$r10							\n\
	move.d $r10,[$sp]						\n\
	jump 0b								\n\
4:									\n\
	movem [$sp+],$r10						\n\
	addq 44,$r10							\n\
	addq 44,$r12							\n\
	jump 2b								\n\
									\n\
	.previous							\n\
	.section __ex_table,\"a\"					\n\
	.dword 0b,3b							\n\
	.dword 1b,4b							\n\
	.previous"

     /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
     /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));

  }

  /* Either we directly start copying, using dword copying in a loop, or
     we copy as much as possible with 'movem' and then the last block (<44
     bytes) is copied here.  This will work since 'movem' will have
     updated SRC, DST and N.  */

  while (n >= 16)
  {
    __asm_copy_to_user_16 (dst, src, retn);
    n -= 16;
  }

  /* Having a separate by-four loops cuts down on cache footprint.
     FIXME:  Test with and without; increasing switch to be 0..15.  */
  while (n >= 4)
  {
    __asm_copy_to_user_4 (dst, src, retn);
    n -= 4;
  }

  switch (n)
  {
    case 0:
      break;
    case 1:
      __asm_copy_to_user_1 (dst, src, retn);
      break;
    case 2:
      __asm_copy_to_user_2 (dst, src, retn);
      break;
    case 3:
      __asm_copy_to_user_3 (dst, src, retn);
      break;
  }

  return retn;
}
EXPORT_SYMBOL(__copy_user);

/* Copy from user to kernel, zeroing the bytes that were inaccessible in
   userland.  The return-value is the number of bytes that were
   inaccessible.  */

unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
				  unsigned long pn)
{
  /* We want the parameters put in special registers.
     Make sure the compiler is able to make something useful of this.
     As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).

     FIXME: Comment for old gcc version.  Check.
     If gcc was alright, it really would need no temporaries, and no
     stack space to save stuff on.  */

  register char *dst __asm__ ("r13") = pdst;
  register const char *src __asm__ ("r11") = psrc;
  register int n __asm__ ("r12") = pn;
  register int retn __asm__ ("r10") = 0;

  /* The best reason to align src is that we then know that a read-fault
     was for aligned bytes; there's no 1..3 remaining good bytes to
     pickle.  */
  if (((unsigned long) src & 3) != 0)
  {
    if (((unsigned long) src & 1) && n != 0)
    {
      __asm_copy_from_user_1 (dst, src, retn);
      n--;
    }

    if (((unsigned long) src & 2) && n >= 2)
    {
      __asm_copy_from_user_2 (dst, src, retn);
      n -= 2;
    }

    /* We only need one check after the unalignment-adjustments, because
       if both adjustments were done, either both or neither reference
       had an exception.  */
    if (retn != 0)
      goto copy_exception_bytes;
  }

  /* Decide which copying method to use. */
  if (n >= 44*2)		/* Break even between movem and
				   move16 is at 38.7*2, but modulo 44.
				   FIXME: We use move4 now.  */
  {
    /* For large copies we use 'movem' */

    /* It is not optimal to tell the compiler about clobbering any
       registers; that will move the saving/restoring of those registers
       to the function prologue/epilogue, and make non-movem sizes
       suboptimal.

       This method is not foolproof; it assumes that the "asm reg"
       declarations at the beginning of the function really are used
       here (beware: they may be moved to temporary registers).
       This way, we do not have to save/move the registers around into
       temporaries; we can safely use them straight away.

       If you want to check that the allocation was right; then
       check the equalities in the first comment.  It should say
       "r13=r13, r11=r11, r12=r12" */
    __asm__ volatile ("\n\
	.ifnc %0%1%2%3,$r13$r11$r12$r10					\n\
	.err								\n\
	.endif								\n\
									\n\
	;; Save the registers we'll use in the movem process		\n\
	;; on the stack.						\n\
	subq	11*4,$sp						\n\
	movem	$r10,[$sp]						\n\
									\n\
	;; Now we've got this:						\n\
	;; r11 - src							\n\
	;; r13 - dst							\n\
	;; r12 - n							\n\
									\n\
	;; Update n for the first loop					\n\
	subq	44,$r12							\n\
0:									\n\
	movem	[$r11+],$r10						\n\
1:									\n\
	subq   44,$r12							\n\
	bge	0b							\n\
	movem	$r10,[$r13+]						\n\
									\n\
	addq   44,$r12  ;; compensate for last loop underflowing n	\n\
									\n\
	;; Restore registers from stack					\n\
	movem [$sp+],$r10						\n\
4:									\n\
	.section .fixup,\"ax\"						\n\
									\n\
;; Do not jump back into the loop if we fail.  For some uses, we get a	\n\
;; page fault somewhere on the line.  Without checking for page limits,	\n\
;; we don't know where, but we need to copy accurately and keep an	\n\
;; accurate count; not just clear the whole line.  To do that, we fall	\n\
;; down in the code below, proceeding with smaller amounts.  It should	\n\
;; be kept in mind that we have to cater to code like what at one time	\n\
;; was in fs/super.c:							\n\
;;  i = size - copy_from_user((void *)page, data, size);		\n\
;; which would cause repeated faults while clearing the remainder of	\n\
;; the SIZE bytes at PAGE after the first fault.			\n\
;; A caveat here is that we must not fall through from a failing page	\n\
;; to a valid page.							\n\
									\n\
3:									\n\
	movem  [$sp+],$r10						\n\
	addq	44,$r12 ;; Get back count before faulting point.	\n\
	subq	44,$r11 ;; Get back pointer to faulting movem-line.	\n\
	jump	4b	;; Fall through, pretending the fault didn't happen.\n\
									\n\
	.previous							\n\
	.section __ex_table,\"a\"					\n\
	.dword 1b,3b							\n\
	.previous"

     /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
     /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));

  }

  /* Either we directly start copying here, using dword copying in a loop,
     or we copy as much as possible with 'movem' and then the last block
     (<44 bytes) is copied here.  This will work since 'movem' will have
     updated src, dst and n.  (Except with failing src.)

     Since we want to keep src accurate, we can't use
     __asm_copy_from_user_N with N != (1, 2, 4); it updates dst and
     retn, but not src (by design; it's value is ignored elsewhere).  */

  while (n >= 4)
  {
    __asm_copy_from_user_4 (dst, src, retn);
    n -= 4;

    if (retn)
      goto copy_exception_bytes;
  }

  /* If we get here, there were no memory read faults.  */
  switch (n)
  {
    /* These copies are at least "naturally aligned" (so we don't have
       to check each byte), due to the src alignment code before the
       movem loop.  The *_3 case *will* get the correct count for retn.  */
    case 0:
      /* This case deliberately left in (if you have doubts check the
	 generated assembly code).  */
      break;
    case 1:
      __asm_copy_from_user_1 (dst, src, retn);
      break;
    case 2:
      __asm_copy_from_user_2 (dst, src, retn);
      break;
    case 3:
      __asm_copy_from_user_3 (dst, src, retn);
      break;
  }

  /* If we get here, retn correctly reflects the number of failing
     bytes.  */
  return retn;

copy_exception_bytes:
  /* We already have "retn" bytes cleared, and need to clear the
     remaining "n" bytes.  A non-optimized simple byte-for-byte in-line
     memset is preferred here, since this isn't speed-critical code and
     we'd rather have this a leaf-function than calling memset.  */
  {
    char *endp;
    for (endp = dst + n; dst < endp; dst++)
      *dst = 0;
  }

  return retn + n;
}
EXPORT_SYMBOL(__copy_user_zeroing);

/* Zero userspace.  */
unsigned long __do_clear_user(void __user *pto, unsigned long pn)
{
  /* We want the parameters put in special registers.
     Make sure the compiler is able to make something useful of this.
      As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).

     FIXME: Comment for old gcc version.  Check.
     If gcc was alright, it really would need no temporaries, and no
     stack space to save stuff on. */

  register char *dst __asm__ ("r13") = pto;
  register int n __asm__ ("r12") = pn;
  register int retn __asm__ ("r10") = 0;


  if (((unsigned long) dst & 3) != 0
     /* Don't align if we wouldn't copy more than a few bytes.  */
      && n >= 3)
  {
    if ((unsigned long) dst & 1)
    {
      __asm_clear_1 (dst, retn);
      n--;
    }

    if ((unsigned long) dst & 2)
    {
      __asm_clear_2 (dst, retn);
      n -= 2;
    }
  }

  /* Decide which copying method to use.
     FIXME: This number is from the "ordinary" kernel memset.  */
  if (n >= (1*48))
  {
    /* For large clears we use 'movem' */

    /* It is not optimal to tell the compiler about clobbering any
       call-saved registers; that will move the saving/restoring of
       those registers to the function prologue/epilogue, and make
       non-movem sizes suboptimal.

       This method is not foolproof; it assumes that the "asm reg"
       declarations at the beginning of the function really are used
       here (beware: they may be moved to temporary registers).
       This way, we do not have to save/move the registers around into
       temporaries; we can safely use them straight away.

      If you want to check that the allocation was right; then
      check the equalities in the first comment.  It should say
      something like "r13=r13, r11=r11, r12=r12". */
    __asm__ volatile ("\n\
	.ifnc %0%1%2,$r13$r12$r10					\n\
	.err								\n\
	.endif								\n\
									\n\
	;; Save the registers we'll clobber in the movem process	\n\
	;; on the stack.  Don't mention them to gcc, it will only be	\n\
	;; upset.							\n\
	subq	11*4,$sp						\n\
	movem	$r10,[$sp]						\n\
									\n\
	clear.d $r0							\n\
	clear.d $r1							\n\
	clear.d $r2							\n\
	clear.d $r3							\n\
	clear.d $r4							\n\
	clear.d $r5							\n\
	clear.d $r6							\n\
	clear.d $r7							\n\
	clear.d $r8							\n\
	clear.d $r9							\n\
	clear.d $r10							\n\
	clear.d $r11							\n\
									\n\
	;; Now we've got this:						\n\
	;; r13 - dst							\n\
	;; r12 - n							\n\
									\n\
	;; Update n for the first loop					\n\
	subq	12*4,$r12						\n\
0:									\n\
	subq   12*4,$r12						\n\
	bge	0b							\n\
	movem	$r11,[$r13+]						\n\
1:									\n\
	addq   12*4,$r12        ;; compensate for last loop underflowing n\n\
									\n\
	;; Restore registers from stack					\n\
	movem [$sp+],$r10						\n\
2:									\n\
	.section .fixup,\"ax\"						\n\
3:									\n\
	move.d [$sp],$r10						\n\
	addq 12*4,$r10							\n\
	move.d $r10,[$sp]						\n\
	clear.d $r10							\n\
	jump 0b								\n\
									\n\
4:									\n\
	movem [$sp+],$r10						\n\
	addq 12*4,$r10							\n\
	addq 12*4,$r12							\n\
	jump 2b								\n\
									\n\
	.previous							\n\
	.section __ex_table,\"a\"					\n\
	.dword 0b,3b							\n\
	.dword 1b,4b							\n\
	.previous"

     /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)
     /* Inputs */ : "0" (dst), "1" (n), "2" (retn)
     /* Clobber */ : "r11");
  }

  while (n >= 16)
  {
    __asm_clear_16 (dst, retn);
    n -= 16;
  }

  /* Having a separate by-four loops cuts down on cache footprint.
     FIXME:  Test with and without; increasing switch to be 0..15.  */
  while (n >= 4)
  {
    __asm_clear_4 (dst, retn);
    n -= 4;
  }

  switch (n)
  {
    case 0:
      break;
    case 1:
      __asm_clear_1 (dst, retn);
      break;
    case 2:
      __asm_clear_2 (dst, retn);
      break;
    case 3:
      __asm_clear_3 (dst, retn);
      break;
  }

  return retn;
}
EXPORT_SYMBOL(__do_clear_user);
Commit	Line	Data
1da177e4 LT	1	/*
	2	* User address space access functions.
	3	* The non-inlined parts of asm-cris/uaccess.h are here.
	4	*
	5	* Copyright (C) 2000, Axis Communications AB.
	6	*
	7	* Written by Hans-Peter Nilsson.
	8	* Pieces used from memcpy, originally by Kenny Ranerup long time ago.
	9	*/
	10
7c0f6ba6	11	#include <linux/uaccess.h>
1da177e4 LT	12
	13	/* Asm:s have been tweaked (within the domain of correctness) to give
	14	satisfactory results for "gcc version 2.96 20000427 (experimental)".
	15
	16	Check regularly...
	17
	18	Note that the PC saved at a bus-fault is the address after the
	19	faulting instruction, which means the branch-target for instructions in
	20	delay-slots for taken branches. Note also that the postincrement in
	21	the instruction is performed regardless of bus-fault; the register is
	22	seen updated in fault handlers.
	23
	24	Oh, and on the code formatting issue, to whomever feels like "fixing
	25	it" to Conformity: I'm too "lazy", but why don't you go ahead and "fix"
	26	string.c too. I just don't think too many people will hack this file
	27	for the code format to be an issue. */
	28
	29
	30	/* Copy to userspace. This is based on the memcpy used for
	31	kernel-to-kernel copying; see "string.c". */
	32
dbd3c7e1	33	unsigned long __copy_user(void __user pdst, const void psrc, unsigned long pn)
1da177e4 LT	34	{
	35	/* We want the parameters put in special registers.
	36	Make sure the compiler is able to make something useful of this.
	37	As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
	38
	39	FIXME: Comment for old gcc version. Check.
49b4ff33	40	If gcc was alright, it really would need no temporaries, and no
1da177e4 LT	41	stack space to save stuff on. */
	42
	43	register char *dst __asm__ ("r13") = pdst;
	44	register const char *src __asm__ ("r11") = psrc;
	45	register int n __asm__ ("r12") = pn;
	46	register int retn __asm__ ("r10") = 0;
	47
	48
	49	/* When src is aligned but not dst, this makes a few extra needless
	50	cycles. I believe it would take as many to check that the
	51	re-alignment was unnecessary. */
	52	if (((unsigned long) dst & 3) != 0
	53	/* Don't align if we wouldn't copy more than a few bytes; so we
	54	don't have to check further for overflows. */
	55	&& n >= 3)
	56	{
	57	if ((unsigned long) dst & 1)
	58	{
	59	__asm_copy_to_user_1 (dst, src, retn);
	60	n--;
	61	}
	62
	63	if ((unsigned long) dst & 2)
	64	{
	65	__asm_copy_to_user_2 (dst, src, retn);
	66	n -= 2;
	67	}
	68	}
	69
	70	/* Decide which copying method to use. */
	71	if (n >= 442) / Break even between movem and
	72	move16 is at 38.72, but modulo 44. /
	73	{
	74	/* For large copies we use 'movem'. */
	75
	76	/* It is not optimal to tell the compiler about clobbering any
	77	registers; that will move the saving/restoring of those registers
	78	to the function prologue/epilogue, and make non-movem sizes
	79	suboptimal.
	80
	81	This method is not foolproof; it assumes that the "asm reg"
	82	declarations at the beginning of the function really are used
	83	here (beware: they may be moved to temporary registers).
	84	This way, we do not have to save/move the registers around into
	85	temporaries; we can safely use them straight away.
	86
	87	If you want to check that the allocation was right; then
	88	check the equalities in the first comment. It should say
	89	"r13=r13, r11=r11, r12=r12". */
	90	__asm__ volatile ("\
	91	.ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
	92	.err \n\
	93	.endif \n\
2b05d2b3 JN	94	\n\
	95	;; Save the registers we'll use in the movem process \n\
	96	;; on the stack. \n\
	97	subq 11*4,$sp \n\
	98	movem $r10,[$sp] \n\
	99	\n\
	100	;; Now we've got this: \n\
	101	;; r11 - src \n\
	102	;; r13 - dst \n\
	103	;; r12 - n \n\
	104	\n\
	105	;; Update n for the first loop \n\
	106	subq 44,$r12 \n\
	107	\n\
	108	; Since the noted PC of a faulting instruction in a delay-slot of a taken \n\
	109	; branch, is that of the branch target, we actually point at the from-movem \n\
	110	; for this case. There is no ambiguity here; if there was a fault in that \n\
	111	; instruction (meaning a kernel oops), the faulted PC would be the address \n\
	112	; after that movem. \n\
	113	\n\
	114	0: \n\
	115	movem [$r11+],$r10 \n\
	116	subq 44,$r12 \n\
	117	bge 0b \n\
	118	movem $r10,[$r13+] \n\
	119	1: \n\
	120	addq 44,$r12 ;; compensate for last loop underflowing n \n\
	121	\n\
	122	;; Restore registers from stack \n\
	123	movem [$sp+],$r10 \n\
	124	2: \n\
	125	.section .fixup,\"ax\" \n\
	126	\n\
	127	; To provide a correct count in r10 of bytes that failed to be copied, \n\
	128	; we jump back into the loop if the loop-branch was taken. There is no \n\
	129	; performance penalty for sany use; the program will segfault soon enough.\n\
	130	\n\
	131	3: \n\
	132	move.d [$sp],$r10 \n\
	133	addq 44,$r10 \n\
	134	move.d $r10,[$sp] \n\
	135	jump 0b \n\
	136	4: \n\
	137	movem [$sp+],$r10 \n\
	138	addq 44,$r10 \n\
	139	addq 44,$r12 \n\
	140	jump 2b \n\
	141	\n\
	142	.previous \n\
	143	.section __ex_table,\"a\" \n\
	144	.dword 0b,3b \n\
	145	.dword 1b,4b \n\
1da177e4 LT	146	.previous"
	147
	148	/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
	149	/* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
	150
	151	}
	152
	153	/* Either we directly start copying, using dword copying in a loop, or
	154	we copy as much as possible with 'movem' and then the last block (<44
	155	bytes) is copied here. This will work since 'movem' will have
	156	updated SRC, DST and N. */
	157
	158	while (n >= 16)
	159	{
	160	__asm_copy_to_user_16 (dst, src, retn);
	161	n -= 16;
	162	}
	163
	164	/* Having a separate by-four loops cuts down on cache footprint.
	165	FIXME: Test with and without; increasing switch to be 0..15. */
	166	while (n >= 4)
	167	{
	168	__asm_copy_to_user_4 (dst, src, retn);
	169	n -= 4;
	170	}
	171
	172	switch (n)
	173	{
	174	case 0:
	175	break;
	176	case 1:
	177	__asm_copy_to_user_1 (dst, src, retn);
	178	break;
	179	case 2:
	180	__asm_copy_to_user_2 (dst, src, retn);
	181	break;
	182	case 3:
	183	__asm_copy_to_user_3 (dst, src, retn);
	184	break;
	185	}
	186
	187	return retn;
	188	}
dbd3c7e1	189	EXPORT_SYMBOL(__copy_user);
1da177e4 LT	190
	191	/* Copy from user to kernel, zeroing the bytes that were inaccessible in
	192	userland. The return-value is the number of bytes that were
	193	inaccessible. */
	194
dbd3c7e1 JN	195	unsigned long __copy_user_zeroing(void pdst, const void __user psrc,
dbd3c7e1 JN	196	unsigned long pn)
1da177e4 LT	197	{
	198	/* We want the parameters put in special registers.
	199	Make sure the compiler is able to make something useful of this.
	200	As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
	201
	202	FIXME: Comment for old gcc version. Check.
49b4ff33	203	If gcc was alright, it really would need no temporaries, and no
1da177e4 LT	204	stack space to save stuff on. */
	205
	206	register char *dst __asm__ ("r13") = pdst;
	207	register const char *src __asm__ ("r11") = psrc;
	208	register int n __asm__ ("r12") = pn;
	209	register int retn __asm__ ("r10") = 0;
	210
	211	/* The best reason to align src is that we then know that a read-fault
	212	was for aligned bytes; there's no 1..3 remaining good bytes to
	213	pickle. */
	214	if (((unsigned long) src & 3) != 0)
	215	{
	216	if (((unsigned long) src & 1) && n != 0)
	217	{
	218	__asm_copy_from_user_1 (dst, src, retn);
	219	n--;
	220	}
	221
	222	if (((unsigned long) src & 2) && n >= 2)
	223	{
	224	__asm_copy_from_user_2 (dst, src, retn);
	225	n -= 2;
	226	}
	227
	228	/* We only need one check after the unalignment-adjustments, because
	229	if both adjustments were done, either both or neither reference
	230	had an exception. */
	231	if (retn != 0)
	232	goto copy_exception_bytes;
	233	}
	234
	235	/* Decide which copying method to use. */
	236	if (n >= 442) / Break even between movem and
	237	move16 is at 38.7*2, but modulo 44.
	238	FIXME: We use move4 now. */
	239	{
	240	/* For large copies we use 'movem' */
	241
	242	/* It is not optimal to tell the compiler about clobbering any
	243	registers; that will move the saving/restoring of those registers
	244	to the function prologue/epilogue, and make non-movem sizes
	245	suboptimal.
	246
	247	This method is not foolproof; it assumes that the "asm reg"
	248	declarations at the beginning of the function really are used
	249	here (beware: they may be moved to temporary registers).
	250	This way, we do not have to save/move the registers around into
	251	temporaries; we can safely use them straight away.
	252
	253	If you want to check that the allocation was right; then
	254	check the equalities in the first comment. It should say
	255	"r13=r13, r11=r11, r12=r12" */
2b05d2b3	256	__asm__ volatile ("\n\
1da177e4 LT	257	.ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
	258	.err \n\
	259	.endif \n\
2b05d2b3 JN	260	\n\
	261	;; Save the registers we'll use in the movem process \n\
	262	;; on the stack. \n\
	263	subq 11*4,$sp \n\
	264	movem $r10,[$sp] \n\
	265	\n\
	266	;; Now we've got this: \n\
	267	;; r11 - src \n\
	268	;; r13 - dst \n\
	269	;; r12 - n \n\
	270	\n\
	271	;; Update n for the first loop \n\
	272	subq 44,$r12 \n\
	273	0: \n\
	274	movem [$r11+],$r10 \n\
	275	1: \n\
	276	subq 44,$r12 \n\
	277	bge 0b \n\
	278	movem $r10,[$r13+] \n\
	279	\n\
	280	addq 44,$r12 ;; compensate for last loop underflowing n \n\
	281	\n\
	282	;; Restore registers from stack \n\
	283	movem [$sp+],$r10 \n\
	284	4: \n\
	285	.section .fixup,\"ax\" \n\
	286	\n\
	287	;; Do not jump back into the loop if we fail. For some uses, we get a \n\
	288	;; page fault somewhere on the line. Without checking for page limits, \n\
	289	;; we don't know where, but we need to copy accurately and keep an \n\
	290	;; accurate count; not just clear the whole line. To do that, we fall \n\
	291	;; down in the code below, proceeding with smaller amounts. It should \n\
	292	;; be kept in mind that we have to cater to code like what at one time \n\
	293	;; was in fs/super.c: \n\
	294	;; i = size - copy_from_user((void *)page, data, size); \n\
	295	;; which would cause repeated faults while clearing the remainder of \n\
	296	;; the SIZE bytes at PAGE after the first fault. \n\
	297	;; A caveat here is that we must not fall through from a failing page \n\
	298	;; to a valid page. \n\
	299	\n\
	300	3: \n\
	301	movem [$sp+],$r10 \n\
	302	addq 44,$r12 ;; Get back count before faulting point. \n\
	303	subq 44,$r11 ;; Get back pointer to faulting movem-line. \n\
	304	jump 4b ;; Fall through, pretending the fault didn't happen.\n\
	305	\n\
	306	.previous \n\
	307	.section __ex_table,\"a\" \n\
	308	.dword 1b,3b \n\
1da177e4 LT	309	.previous"
	310
	311	/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
	312	/* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
	313
	314	}
	315
	316	/* Either we directly start copying here, using dword copying in a loop,
	317	or we copy as much as possible with 'movem' and then the last block
	318	(<44 bytes) is copied here. This will work since 'movem' will have
	319	updated src, dst and n. (Except with failing src.)
	320
	321	Since we want to keep src accurate, we can't use
	322	__asm_copy_from_user_N with N != (1, 2, 4); it updates dst and
	323	retn, but not src (by design; it's value is ignored elsewhere). */
	324
	325	while (n >= 4)
	326	{
	327	__asm_copy_from_user_4 (dst, src, retn);
	328	n -= 4;
	329
	330	if (retn)
	331	goto copy_exception_bytes;
	332	}
	333
	334	/* If we get here, there were no memory read faults. */
	335	switch (n)
	336	{
	337	/* These copies are at least "naturally aligned" (so we don't have
	338	to check each byte), due to the src alignment code before the
	339	movem loop. The _3 case will* get the correct count for retn. */
	340	case 0:
	341	/* This case deliberately left in (if you have doubts check the
	342	generated assembly code). */
	343	break;
	344	case 1:
	345	__asm_copy_from_user_1 (dst, src, retn);
	346	break;
	347	case 2:
	348	__asm_copy_from_user_2 (dst, src, retn);
	349	break;
	350	case 3:
	351	__asm_copy_from_user_3 (dst, src, retn);
	352	break;
	353	}
	354
	355	/* If we get here, retn correctly reflects the number of failing
	356	bytes. */
	357	return retn;
	358
	359	copy_exception_bytes:
	360	/* We already have "retn" bytes cleared, and need to clear the
	361	remaining "n" bytes. A non-optimized simple byte-for-byte in-line
	362	memset is preferred here, since this isn't speed-critical code and
	363	we'd rather have this a leaf-function than calling memset. */
	364	{
	365	char *endp;
	366	for (endp = dst + n; dst < endp; dst++)
	367	*dst = 0;
	368	}
	369
	370	return retn + n;
	371	}
dbd3c7e1	372	EXPORT_SYMBOL(__copy_user_zeroing);
1da177e4 LT	373
1da177e4 LT	374	/* Zero userspace. */
dbd3c7e1	375	unsigned long __do_clear_user(void __user *pto, unsigned long pn)
1da177e4 LT	376	{
	377	/* We want the parameters put in special registers.
	378	Make sure the compiler is able to make something useful of this.
	379	As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
	380
	381	FIXME: Comment for old gcc version. Check.
49b4ff33	382	If gcc was alright, it really would need no temporaries, and no
1da177e4 LT	383	stack space to save stuff on. */
	384
	385	register char *dst __asm__ ("r13") = pto;
	386	register int n __asm__ ("r12") = pn;
	387	register int retn __asm__ ("r10") = 0;
	388
	389
	390	if (((unsigned long) dst & 3) != 0
	391	/* Don't align if we wouldn't copy more than a few bytes. */
	392	&& n >= 3)
	393	{
	394	if ((unsigned long) dst & 1)
	395	{
	396	__asm_clear_1 (dst, retn);
	397	n--;
	398	}
	399
	400	if ((unsigned long) dst & 2)
	401	{
	402	__asm_clear_2 (dst, retn);
	403	n -= 2;
	404	}
	405	}
	406
	407	/* Decide which copying method to use.
	408	FIXME: This number is from the "ordinary" kernel memset. */
	409	if (n >= (1*48))
	410	{
	411	/* For large clears we use 'movem' */
	412
	413	/* It is not optimal to tell the compiler about clobbering any
	414	call-saved registers; that will move the saving/restoring of
	415	those registers to the function prologue/epilogue, and make
	416	non-movem sizes suboptimal.
	417
	418	This method is not foolproof; it assumes that the "asm reg"
	419	declarations at the beginning of the function really are used
	420	here (beware: they may be moved to temporary registers).
	421	This way, we do not have to save/move the registers around into
	422	temporaries; we can safely use them straight away.
	423
	424	If you want to check that the allocation was right; then
	425	check the equalities in the first comment. It should say
	426	something like "r13=r13, r11=r11, r12=r12". */
2b05d2b3	427	__asm__ volatile ("\n\
1da177e4 LT	428	.ifnc %0%1%2,$r13$r12$r10 \n\
	429	.err \n\
	430	.endif \n\
2b05d2b3 JN	431	\n\
	432	;; Save the registers we'll clobber in the movem process \n\
	433	;; on the stack. Don't mention them to gcc, it will only be \n\
	434	;; upset. \n\
	435	subq 11*4,$sp \n\
	436	movem $r10,[$sp] \n\
	437	\n\
	438	clear.d $r0 \n\
	439	clear.d $r1 \n\
	440	clear.d $r2 \n\
	441	clear.d $r3 \n\
	442	clear.d $r4 \n\
	443	clear.d $r5 \n\
	444	clear.d $r6 \n\
	445	clear.d $r7 \n\
	446	clear.d $r8 \n\
	447	clear.d $r9 \n\
	448	clear.d $r10 \n\
	449	clear.d $r11 \n\
	450	\n\
	451	;; Now we've got this: \n\
	452	;; r13 - dst \n\
	453	;; r12 - n \n\
	454	\n\
	455	;; Update n for the first loop \n\
	456	subq 12*4,$r12 \n\
	457	0: \n\
	458	subq 12*4,$r12 \n\
	459	bge 0b \n\
	460	movem $r11,[$r13+] \n\
	461	1: \n\
	462	addq 12*4,$r12 ;; compensate for last loop underflowing n\n\
	463	\n\
	464	;; Restore registers from stack \n\
	465	movem [$sp+],$r10 \n\
	466	2: \n\
	467	.section .fixup,\"ax\" \n\
	468	3: \n\
	469	move.d [$sp],$r10 \n\
	470	addq 12*4,$r10 \n\
	471	move.d $r10,[$sp] \n\
	472	clear.d $r10 \n\
	473	jump 0b \n\
	474	\n\
	475	4: \n\
	476	movem [$sp+],$r10 \n\
	477	addq 12*4,$r10 \n\
	478	addq 12*4,$r12 \n\
	479	jump 2b \n\
	480	\n\
	481	.previous \n\
	482	.section __ex_table,\"a\" \n\
	483	.dword 0b,3b \n\
	484	.dword 1b,4b \n\
1da177e4 LT	485	.previous"
	486
	487	/* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)
	488	/* Inputs */ : "0" (dst), "1" (n), "2" (retn)
	489	/* Clobber */ : "r11");
	490	}
	491
	492	while (n >= 16)
	493	{
	494	__asm_clear_16 (dst, retn);
	495	n -= 16;
	496	}
	497
	498	/* Having a separate by-four loops cuts down on cache footprint.
	499	FIXME: Test with and without; increasing switch to be 0..15. */
	500	while (n >= 4)
	501	{
	502	__asm_clear_4 (dst, retn);
	503	n -= 4;
	504	}
	505
	506	switch (n)
	507	{
	508	case 0:
	509	break;
	510	case 1:
	511	__asm_clear_1 (dst, retn);
	512	break;
	513	case 2:
	514	__asm_clear_2 (dst, retn);
	515	break;
	516	case 3:
	517	__asm_clear_3 (dst, retn);
	518	break;
	519	}
	520
	521	return retn;
	522	}
dbd3c7e1	523	EXPORT_SYMBOL(__do_clear_user);