/*
- * Copyright 2002,2003 Andi Kleen, SuSE Labs.
- *
+ * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
+ *
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of this archive
* for more details. No warranty for anything given at all.
/*
* Checksum copy with exception handling.
- * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
+ * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
* destination is zeroed.
- *
+ *
* Input
* rdi source
* rsi destination
* edx len (32bit)
- * ecx sum (32bit)
+ * ecx sum (32bit)
* r8 src_err_ptr (int)
* r9 dst_err_ptr (int)
*
* Output
* eax 64bit sum. undefined in case of exception.
- *
- * Wrappers need to take care of valid exception sum and zeroing.
+ *
+ * Wrappers need to take care of valid exception sum and zeroing.
* They also should align source or destination to 8 bytes.
*/
.macro source
10:
- .section __ex_table,"a"
+ .section __ex_table, "a"
.align 8
- .quad 10b,.Lbad_source
+ .quad 10b, .Lbad_source
.previous
.endm
-
+
.macro dest
20:
- .section __ex_table,"a"
+ .section __ex_table, "a"
.align 8
- .quad 20b,.Lbad_dest
+ .quad 20b, .Lbad_dest
.previous
.endm
-
+
.macro ignore L=.Lignore
30:
- .section __ex_table,"a"
+ .section __ex_table, "a"
.align 8
- .quad 30b,\L
+ .quad 30b, \L
.previous
.endm
-
-
+
+
ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
- cmpl $3*64,%edx
- jle .Lignore
+ cmpl $3*64, %edx
+ jle .Lignore
-.Lignore:
- subq $7*8,%rsp
+.Lignore:
+ subq $7*8, %rsp
CFI_ADJUST_CFA_OFFSET 7*8
- movq %rbx,2*8(%rsp)
+ movq %rbx, 2*8(%rsp)
CFI_REL_OFFSET rbx, 2*8
- movq %r12,3*8(%rsp)
+ movq %r12, 3*8(%rsp)
CFI_REL_OFFSET r12, 3*8
- movq %r14,4*8(%rsp)
+ movq %r14, 4*8(%rsp)
CFI_REL_OFFSET r14, 4*8
- movq %r13,5*8(%rsp)
+ movq %r13, 5*8(%rsp)
CFI_REL_OFFSET r13, 5*8
- movq %rbp,6*8(%rsp)
+ movq %rbp, 6*8(%rsp)
CFI_REL_OFFSET rbp, 6*8
- movq %r8,(%rsp)
- movq %r9,1*8(%rsp)
-
- movl %ecx,%eax
- movl %edx,%ecx
+ movq %r8, (%rsp)
+ movq %r9, 1*8(%rsp)
- xorl %r9d,%r9d
- movq %rcx,%r12
+ movl %ecx, %eax
+ movl %edx, %ecx
- shrq $6,%r12
- jz .Lhandle_tail /* < 64 */
+ xorl %r9d, %r9d
+ movq %rcx, %r12
+
+ shrq $6, %r12
+ jz .Lhandle_tail /* < 64 */
clc
-
+
/* main loop. clear in 64 byte blocks */
/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
/* r11: temp3, rdx: temp4, r12 loopcnt */
.p2align 4
.Lloop:
source
- movq (%rdi),%rbx
+ movq (%rdi), %rbx
source
- movq 8(%rdi),%r8
+ movq 8(%rdi), %r8
source
- movq 16(%rdi),%r11
+ movq 16(%rdi), %r11
source
- movq 24(%rdi),%rdx
+ movq 24(%rdi), %rdx
source
- movq 32(%rdi),%r10
+ movq 32(%rdi), %r10
source
- movq 40(%rdi),%rbp
+ movq 40(%rdi), %rbp
source
- movq 48(%rdi),%r14
+ movq 48(%rdi), %r14
source
- movq 56(%rdi),%r13
-
+ movq 56(%rdi), %r13
+
ignore 2f
prefetcht0 5*64(%rdi)
-2:
- adcq %rbx,%rax
- adcq %r8,%rax
- adcq %r11,%rax
- adcq %rdx,%rax
- adcq %r10,%rax
- adcq %rbp,%rax
- adcq %r14,%rax
- adcq %r13,%rax
+2:
+ adcq %rbx, %rax
+ adcq %r8, %rax
+ adcq %r11, %rax
+ adcq %rdx, %rax
+ adcq %r10, %rax
+ adcq %rbp, %rax
+ adcq %r14, %rax
+ adcq %r13, %rax
decl %r12d
-
+
dest
- movq %rbx,(%rsi)
+ movq %rbx, (%rsi)
dest
- movq %r8,8(%rsi)
+ movq %r8, 8(%rsi)
dest
- movq %r11,16(%rsi)
+ movq %r11, 16(%rsi)
dest
- movq %rdx,24(%rsi)
+ movq %rdx, 24(%rsi)
dest
- movq %r10,32(%rsi)
+ movq %r10, 32(%rsi)
dest
- movq %rbp,40(%rsi)
+ movq %rbp, 40(%rsi)
dest
- movq %r14,48(%rsi)
+ movq %r14, 48(%rsi)
dest
- movq %r13,56(%rsi)
-
+ movq %r13, 56(%rsi)
+
3:
-
- leaq 64(%rdi),%rdi
- leaq 64(%rsi),%rsi
- jnz .Lloop
+ leaq 64(%rdi), %rdi
+ leaq 64(%rsi), %rsi
- adcq %r9,%rax
+ jnz .Lloop
+
+ adcq %r9, %rax
/* do last up to 56 bytes */
.Lhandle_tail:
/* ecx: count */
- movl %ecx,%r10d
- andl $63,%ecx
- shrl $3,%ecx
- jz .Lfold
+ movl %ecx, %r10d
+ andl $63, %ecx
+ shrl $3, %ecx
+ jz .Lfold
clc
.p2align 4
-.Lloop_8:
+.Lloop_8:
source
- movq (%rdi),%rbx
- adcq %rbx,%rax
+ movq (%rdi), %rbx
+ adcq %rbx, %rax
decl %ecx
dest
- movq %rbx,(%rsi)
- leaq 8(%rsi),%rsi /* preserve carry */
- leaq 8(%rdi),%rdi
+ movq %rbx, (%rsi)
+ leaq 8(%rsi), %rsi /* preserve carry */
+ leaq 8(%rdi), %rdi
jnz .Lloop_8
- adcq %r9,%rax /* add in carry */
+ adcq %r9, %rax /* add in carry */
.Lfold:
/* reduce checksum to 32bits */
- movl %eax,%ebx
- shrq $32,%rax
- addl %ebx,%eax
- adcl %r9d,%eax
+ movl %eax, %ebx
+ shrq $32, %rax
+ addl %ebx, %eax
+ adcl %r9d, %eax
- /* do last up to 6 bytes */
+ /* do last up to 6 bytes */
.Lhandle_7:
- movl %r10d,%ecx
- andl $7,%ecx
- shrl $1,%ecx
+ movl %r10d, %ecx
+ andl $7, %ecx
+ shrl $1, %ecx
jz .Lhandle_1
- movl $2,%edx
- xorl %ebx,%ebx
- clc
+ movl $2, %edx
+ xorl %ebx, %ebx
+ clc
.p2align 4
-.Lloop_1:
+.Lloop_1:
source
- movw (%rdi),%bx
- adcl %ebx,%eax
+ movw (%rdi), %bx
+ adcl %ebx, %eax
decl %ecx
dest
- movw %bx,(%rsi)
- leaq 2(%rdi),%rdi
- leaq 2(%rsi),%rsi
+ movw %bx, (%rsi)
+ leaq 2(%rdi), %rdi
+ leaq 2(%rsi), %rsi
jnz .Lloop_1
- adcl %r9d,%eax /* add in carry */
-
+ adcl %r9d, %eax /* add in carry */
+
/* handle last odd byte */
.Lhandle_1:
- testl $1,%r10d
+ testl $1, %r10d
jz .Lende
- xorl %ebx,%ebx
+ xorl %ebx, %ebx
source
- movb (%rdi),%bl
+ movb (%rdi), %bl
dest
- movb %bl,(%rsi)
- addl %ebx,%eax
- adcl %r9d,%eax /* carry */
-
+ movb %bl, (%rsi)
+ addl %ebx, %eax
+ adcl %r9d, %eax /* carry */
+
CFI_REMEMBER_STATE
.Lende:
- movq 2*8(%rsp),%rbx
+ movq 2*8(%rsp), %rbx
CFI_RESTORE rbx
- movq 3*8(%rsp),%r12
+ movq 3*8(%rsp), %r12
CFI_RESTORE r12
- movq 4*8(%rsp),%r14
+ movq 4*8(%rsp), %r14
CFI_RESTORE r14
- movq 5*8(%rsp),%r13
+ movq 5*8(%rsp), %r13
CFI_RESTORE r13
- movq 6*8(%rsp),%rbp
+ movq 6*8(%rsp), %rbp
CFI_RESTORE rbp
- addq $7*8,%rsp
+ addq $7*8, %rsp
CFI_ADJUST_CFA_OFFSET -7*8
ret
CFI_RESTORE_STATE
/* Exception handlers. Very simple, zeroing is done in the wrappers */
.Lbad_source:
- movq (%rsp),%rax
- testq %rax,%rax
+ movq (%rsp), %rax
+ testq %rax, %rax
jz .Lende
- movl $-EFAULT,(%rax)
+ movl $-EFAULT, (%rax)
jmp .Lende
-
+
.Lbad_dest:
- movq 8(%rsp),%rax
- testq %rax,%rax
- jz .Lende
- movl $-EFAULT,(%rax)
+ movq 8(%rsp), %rax
+ testq %rax, %rax
+ jz .Lende
+ movl $-EFAULT, (%rax)
jmp .Lende
CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)