Linux-2.6.12-rc2
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / cris / arch-v10 / lib / checksumcopy.S
1 /* $Id: checksumcopy.S,v 1.1 2001/12/17 13:59:27 bjornw Exp $
2 * A fast checksum+copy routine using movem
3 * Copyright (c) 1998, 2001 Axis Communications AB
4 *
5 * Authors: Bjorn Wesen
6 *
7 * csum_partial_copy_nocheck(const char *src, char *dst,
8 * int len, unsigned int sum)
9 */
10
11 .globl csum_partial_copy_nocheck
12 csum_partial_copy_nocheck:
13
14 ;; r10 - src
15 ;; r11 - dst
16 ;; r12 - length
17 ;; r13 - checksum
18
19 ;; check for breakeven length between movem and normal word looping versions
20 ;; we also do _NOT_ want to compute a checksum over more than the
21 ;; actual length when length < 40
22
23 cmpu.w 80, $r12
24 blo _word_loop
25 nop
26
27 ;; need to save the registers we use below in the movem loop
28 ;; this overhead is why we have a check above for breakeven length
29 ;; only r0 - r8 have to be saved, the other ones are clobber-able
30 ;; according to the ABI
31
32 subq 9*4, $sp
33 movem $r8, [$sp]
34
35 ;; do a movem copy and checksum
36
37 subq 10*4, $r12 ; update length for the first loop
38
39 _mloop: movem [$r10+],$r9 ; read 10 longwords
40 1: ;; A failing userspace access will have this as PC.
41 movem $r9,[$r11+] ; write 10 longwords
42
43 ;; perform dword checksumming on the 10 longwords
44
45 add.d $r0,$r13
46 ax
47 add.d $r1,$r13
48 ax
49 add.d $r2,$r13
50 ax
51 add.d $r3,$r13
52 ax
53 add.d $r4,$r13
54 ax
55 add.d $r5,$r13
56 ax
57 add.d $r6,$r13
58 ax
59 add.d $r7,$r13
60 ax
61 add.d $r8,$r13
62 ax
63 add.d $r9,$r13
64
65 ;; fold the carry into the checksum, to avoid having to loop the carry
66 ;; back into the top
67
68 ax
69 addq 0,$r13
70 ax ; do it again, since we might have generated a carry
71 addq 0,$r13
72
73 subq 10*4,$r12
74 bge _mloop
75 nop
76
77 addq 10*4,$r12 ; compensate for last loop underflowing length
78
79 movem [$sp+],$r8 ; restore regs
80
81 _word_loop:
82 ;; only fold if there is anything to fold.
83
84 cmpq 0,$r13
85 beq _no_fold
86
87 ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
88 ;; r9 can be used as temporary.
89
90 move.d $r13,$r9
91 lsrq 16,$r9 ; r0 = checksum >> 16
92 and.d 0xffff,$r13 ; checksum = checksum & 0xffff
93 add.d $r9,$r13 ; checksum += r0
94 move.d $r13,$r9 ; do the same again, maybe we got a carry last add
95 lsrq 16,$r9
96 and.d 0xffff,$r13
97 add.d $r9,$r13
98
99 _no_fold:
100 cmpq 2,$r12
101 blt _no_words
102 nop
103
104 ;; copy and checksum the rest of the words
105
106 subq 2,$r12
107
108 _wloop: move.w [$r10+],$r9
109 2: ;; A failing userspace access will have this as PC.
110 addu.w $r9,$r13
111 subq 2,$r12
112 bge _wloop
113 move.w $r9,[$r11+]
114
115 addq 2,$r12
116
117 _no_words:
118 ;; see if we have one odd byte more
119 cmpq 1,$r12
120 beq _do_byte
121 nop
122 ret
123 move.d $r13, $r10
124
125 _do_byte:
126 ;; copy and checksum the last byte
127 move.b [$r10],$r9
128 3: ;; A failing userspace access will have this as PC.
129 addu.b $r9,$r13
130 move.b $r9,[$r11]
131 ret
132 move.d $r13, $r10