Merge tag 'disintegrate-fbdev-20121220' of git://git.infradead.org/users/dhowells...
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / powerpc / lib / string_64.S
1 /*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2012
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20
21 #include <asm/ppc_asm.h>
22 #include <asm/asm-offsets.h>
23
24 .section ".toc","aw"
25 PPC64_CACHES:
26 .tc ppc64_caches[TC],ppc64_caches
27 .section ".text"
28
29 /**
30 * __clear_user: - Zero a block of memory in user space, with less checking.
31 * @to: Destination address, in user space.
32 * @n: Number of bytes to zero.
33 *
34 * Zero a block of memory in user space. Caller must check
35 * the specified block with access_ok() before calling this function.
36 *
37 * Returns number of bytes that could not be cleared.
38 * On success, this will be zero.
39 */
40
41 .macro err1
42 100:
43 .section __ex_table,"a"
44 .align 3
45 .llong 100b,.Ldo_err1
46 .previous
47 .endm
48
49 .macro err2
50 200:
51 .section __ex_table,"a"
52 .align 3
53 .llong 200b,.Ldo_err2
54 .previous
55 .endm
56
57 .macro err3
58 300:
59 .section __ex_table,"a"
60 .align 3
61 .llong 300b,.Ldo_err3
62 .previous
63 .endm
64
65 .Ldo_err1:
66 mr r3,r8
67
68 .Ldo_err2:
69 mtctr r4
70 1:
71 err3; stb r0,0(r3)
72 addi r3,r3,1
73 addi r4,r4,-1
74 bdnz 1b
75
76 .Ldo_err3:
77 mr r3,r4
78 blr
79
80 _GLOBAL(__clear_user)
81 cmpdi r4,32
82 neg r6,r3
83 li r0,0
84 blt .Lshort_clear
85 mr r8,r3
86 mtocrf 0x01,r6
87 clrldi r6,r6,(64-3)
88
89 /* Get the destination 8 byte aligned */
90 bf cr7*4+3,1f
91 err1; stb r0,0(r3)
92 addi r3,r3,1
93
94 1: bf cr7*4+2,2f
95 err1; sth r0,0(r3)
96 addi r3,r3,2
97
98 2: bf cr7*4+1,3f
99 err1; stw r0,0(r3)
100 addi r3,r3,4
101
102 3: sub r4,r4,r6
103
104 cmpdi r4,32
105 cmpdi cr1,r4,512
106 blt .Lshort_clear
107 bgt cr1,.Llong_clear
108
109 .Lmedium_clear:
110 srdi r6,r4,5
111 mtctr r6
112
113 /* Do 32 byte chunks */
114 4:
115 err2; std r0,0(r3)
116 err2; std r0,8(r3)
117 err2; std r0,16(r3)
118 err2; std r0,24(r3)
119 addi r3,r3,32
120 addi r4,r4,-32
121 bdnz 4b
122
123 .Lshort_clear:
124 /* up to 31 bytes to go */
125 cmpdi r4,16
126 blt 6f
127 err2; std r0,0(r3)
128 err2; std r0,8(r3)
129 addi r3,r3,16
130 addi r4,r4,-16
131
132 /* Up to 15 bytes to go */
133 6: mr r8,r3
134 clrldi r4,r4,(64-4)
135 mtocrf 0x01,r4
136 bf cr7*4+0,7f
137 err1; std r0,0(r3)
138 addi r3,r3,8
139
140 7: bf cr7*4+1,8f
141 err1; stw r0,0(r3)
142 addi r3,r3,4
143
144 8: bf cr7*4+2,9f
145 err1; sth r0,0(r3)
146 addi r3,r3,2
147
148 9: bf cr7*4+3,10f
149 err1; stb r0,0(r3)
150
151 10: li r3,0
152 blr
153
154 .Llong_clear:
155 ld r5,PPC64_CACHES@toc(r2)
156
157 bf cr7*4+0,11f
158 err2; std r0,0(r3)
159 addi r3,r3,8
160 addi r4,r4,-8
161
162 /* Destination is 16 byte aligned, need to get it cacheline aligned */
163 11: lwz r7,DCACHEL1LOGLINESIZE(r5)
164 lwz r9,DCACHEL1LINESIZE(r5)
165
166 /*
167 * With worst case alignment the long clear loop takes a minimum
168 * of 1 byte less than 2 cachelines.
169 */
170 sldi r10,r9,2
171 cmpd r4,r10
172 blt .Lmedium_clear
173
174 neg r6,r3
175 addi r10,r9,-1
176 and. r5,r6,r10
177 beq 13f
178
179 srdi r6,r5,4
180 mtctr r6
181 mr r8,r3
182 12:
183 err1; std r0,0(r3)
184 err1; std r0,8(r3)
185 addi r3,r3,16
186 bdnz 12b
187
188 sub r4,r4,r5
189
190 13: srd r6,r4,r7
191 mtctr r6
192 mr r8,r3
193 14:
194 err1; dcbz r0,r3
195 add r3,r3,r9
196 bdnz 14b
197
198 and r4,r4,r10
199
200 cmpdi r4,32
201 blt .Lshort_clear
202 b .Lmedium_clear