Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Optmized version of the ip_fast_csum() function | |
3 | * Used for calculating IP header checksum | |
4 | * | |
5 | * Return: 16bit checksum, complemented | |
6 | * | |
7 | * Inputs: | |
8 | * in0: address of buffer to checksum (char *) | |
9 | * in1: length of the buffer (int) | |
10 | * | |
007d77d0 KC |
11 | * Copyright (C) 2002, 2006 Intel Corp. |
12 | * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com> | |
1da177e4 LT |
13 | */ |
14 | ||
15 | #include <asm/asmmacro.h> | |
16 | ||
17 | /* | |
18 | * Since we know that most likely this function is called with buf aligned | |
19 | * on 4-byte boundary and 20 bytes in length, we can execution rather quickly | |
20 | * versus calling generic version of do_csum, which has lots of overhead in | |
21 | * handling various alignments and sizes. However, due to lack of constrains | |
22 | * put on the function input argument, cases with alignment not on 4-byte or | |
23 | * size not equal to 20 bytes will be handled by the generic do_csum function. | |
24 | */ | |
25 | ||
26 | #define in0 r32 | |
27 | #define in1 r33 | |
007d77d0 KC |
28 | #define in2 r34 |
29 | #define in3 r35 | |
30 | #define in4 r36 | |
1da177e4 LT |
31 | #define ret0 r8 |
32 | ||
33 | GLOBAL_ENTRY(ip_fast_csum) | |
34 | .prologue | |
35 | .body | |
36 | cmp.ne p6,p7=5,in1 // size other than 20 byte? | |
37 | and r14=3,in0 // is it aligned on 4-byte? | |
38 | add r15=4,in0 // second source pointer | |
39 | ;; | |
40 | cmp.ne.or.andcm p6,p7=r14,r0 | |
41 | ;; | |
42 | (p7) ld4 r20=[in0],8 | |
43 | (p7) ld4 r21=[r15],8 | |
44 | (p6) br.spnt .generic | |
45 | ;; | |
46 | ld4 r22=[in0],8 | |
47 | ld4 r23=[r15],8 | |
48 | ;; | |
49 | ld4 r24=[in0] | |
50 | add r20=r20,r21 | |
51 | add r22=r22,r23 | |
52 | ;; | |
53 | add r20=r20,r22 | |
54 | ;; | |
55 | add r20=r20,r24 | |
56 | ;; | |
57 | shr.u ret0=r20,16 // now need to add the carry | |
58 | zxt2 r20=r20 | |
59 | ;; | |
60 | add r20=ret0,r20 | |
61 | ;; | |
62 | shr.u ret0=r20,16 // add carry again | |
63 | zxt2 r20=r20 | |
64 | ;; | |
65 | add r20=ret0,r20 | |
66 | ;; | |
67 | shr.u ret0=r20,16 | |
68 | zxt2 r20=r20 | |
69 | ;; | |
70 | add r20=ret0,r20 | |
6dbfc19b | 71 | mov r9=0xffff |
1da177e4 | 72 | ;; |
6dbfc19b | 73 | andcm ret0=r9,r20 |
1da177e4 LT |
74 | .restore sp // reset frame state |
75 | br.ret.sptk.many b0 | |
76 | ;; | |
77 | ||
78 | .generic: | |
79 | .prologue | |
80 | .save ar.pfs, r35 | |
81 | alloc r35=ar.pfs,2,2,2,0 | |
82 | .save rp, r34 | |
83 | mov r34=b0 | |
84 | .body | |
85 | dep.z out1=in1,2,30 | |
86 | mov out0=in0 | |
87 | ;; | |
88 | br.call.sptk.many b0=do_csum | |
89 | ;; | |
90 | andcm ret0=-1,ret0 | |
91 | mov ar.pfs=r35 | |
92 | mov b0=r34 | |
93 | br.ret.sptk.many b0 | |
94 | END(ip_fast_csum) | |
007d77d0 KC |
95 | |
96 | GLOBAL_ENTRY(csum_ipv6_magic) | |
97 | ld4 r20=[in0],4 | |
98 | ld4 r21=[in1],4 | |
5afe18d2 | 99 | zxt4 in2=in2 |
007d77d0 KC |
100 | ;; |
101 | ld4 r22=[in0],4 | |
102 | ld4 r23=[in1],4 | |
5afe18d2 | 103 | dep r15=in3,in2,32,16 |
007d77d0 KC |
104 | ;; |
105 | ld4 r24=[in0],4 | |
106 | ld4 r25=[in1],4 | |
5afe18d2 | 107 | mux1 r15=r15,@rev |
007d77d0 KC |
108 | add r16=r20,r21 |
109 | add r17=r22,r23 | |
5afe18d2 | 110 | zxt4 in4=in4 |
007d77d0 KC |
111 | ;; |
112 | ld4 r26=[in0],4 | |
113 | ld4 r27=[in1],4 | |
5afe18d2 | 114 | shr.u r15=r15,16 |
007d77d0 KC |
115 | add r18=r24,r25 |
116 | add r8=r16,r17 | |
117 | ;; | |
118 | add r19=r26,r27 | |
119 | add r8=r8,r18 | |
120 | ;; | |
121 | add r8=r8,r19 | |
122 | add r15=r15,in4 | |
123 | ;; | |
124 | add r8=r8,r15 | |
125 | ;; | |
126 | shr.u r10=r8,32 // now fold sum into short | |
127 | zxt4 r11=r8 | |
128 | ;; | |
129 | add r8=r10,r11 | |
130 | ;; | |
131 | shr.u r10=r8,16 // yeah, keep it rolling | |
132 | zxt2 r11=r8 | |
133 | ;; | |
134 | add r8=r10,r11 | |
135 | ;; | |
136 | shr.u r10=r8,16 // three times lucky | |
137 | zxt2 r11=r8 | |
138 | ;; | |
139 | add r8=r10,r11 | |
140 | mov r9=0xffff | |
141 | ;; | |
142 | andcm r8=r9,r8 | |
143 | br.ret.sptk.many b0 | |
144 | END(csum_ipv6_magic) |