Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Routines to emulate some Altivec/VMX instructions, specifically | |
3 | * those that can trap when given denormalized operands in Java mode. | |
4 | */ | |
5 | #include <linux/kernel.h> | |
6 | #include <linux/errno.h> | |
7 | #include <linux/sched.h> | |
8 | #include <asm/ptrace.h> | |
9 | #include <asm/processor.h> | |
10 | #include <asm/uaccess.h> | |
11 | ||
12 | /* Functions in vector.S */ | |
13 | extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); | |
14 | extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); | |
15 | extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); | |
16 | extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); | |
17 | extern void vrefp(vector128 *dst, vector128 *src); | |
18 | extern void vrsqrtefp(vector128 *dst, vector128 *src); | |
19 | extern void vexptep(vector128 *dst, vector128 *src); | |
20 | ||
21 | static unsigned int exp2s[8] = { | |
22 | 0x800000, | |
23 | 0x8b95c2, | |
24 | 0x9837f0, | |
25 | 0xa5fed7, | |
26 | 0xb504f3, | |
27 | 0xc5672a, | |
28 | 0xd744fd, | |
29 | 0xeac0c7 | |
30 | }; | |
31 | ||
32 | /* | |
33 | * Computes an estimate of 2^x. The `s' argument is the 32-bit | |
34 | * single-precision floating-point representation of x. | |
35 | */ | |
36 | static unsigned int eexp2(unsigned int s) | |
37 | { | |
38 | int exp, pwr; | |
39 | unsigned int mant, frac; | |
40 | ||
41 | /* extract exponent field from input */ | |
42 | exp = ((s >> 23) & 0xff) - 127; | |
43 | if (exp > 7) { | |
44 | /* check for NaN input */ | |
45 | if (exp == 128 && (s & 0x7fffff) != 0) | |
46 | return s | 0x400000; /* return QNaN */ | |
47 | /* 2^-big = 0, 2^+big = +Inf */ | |
48 | return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ | |
49 | } | |
50 | if (exp < -23) | |
51 | return 0x3f800000; /* 1.0 */ | |
52 | ||
53 | /* convert to fixed point integer in 9.23 representation */ | |
54 | pwr = (s & 0x7fffff) | 0x800000; | |
55 | if (exp > 0) | |
56 | pwr <<= exp; | |
57 | else | |
58 | pwr >>= -exp; | |
59 | if (s & 0x80000000) | |
60 | pwr = -pwr; | |
61 | ||
62 | /* extract integer part, which becomes exponent part of result */ | |
63 | exp = (pwr >> 23) + 126; | |
64 | if (exp >= 254) | |
65 | return 0x7f800000; | |
66 | if (exp < -23) | |
67 | return 0; | |
68 | ||
69 | /* table lookup on top 3 bits of fraction to get mantissa */ | |
70 | mant = exp2s[(pwr >> 20) & 7]; | |
71 | ||
72 | /* linear interpolation using remaining 20 bits of fraction */ | |
73 | asm("mulhwu %0,%1,%2" : "=r" (frac) | |
74 | : "r" (pwr << 12), "r" (0x172b83ff)); | |
75 | asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); | |
76 | mant += frac; | |
77 | ||
78 | if (exp >= 0) | |
79 | return mant + (exp << 23); | |
80 | ||
81 | /* denormalized result */ | |
82 | exp = -exp; | |
83 | mant += 1 << (exp - 1); | |
84 | return mant >> exp; | |
85 | } | |
86 | ||
87 | /* | |
88 | * Computes an estimate of log_2(x). The `s' argument is the 32-bit | |
89 | * single-precision floating-point representation of x. | |
90 | */ | |
91 | static unsigned int elog2(unsigned int s) | |
92 | { | |
93 | int exp, mant, lz, frac; | |
94 | ||
95 | exp = s & 0x7f800000; | |
96 | mant = s & 0x7fffff; | |
97 | if (exp == 0x7f800000) { /* Inf or NaN */ | |
98 | if (mant != 0) | |
99 | s |= 0x400000; /* turn NaN into QNaN */ | |
100 | return s; | |
101 | } | |
102 | if ((exp | mant) == 0) /* +0 or -0 */ | |
103 | return 0xff800000; /* return -Inf */ | |
104 | ||
105 | if (exp == 0) { | |
106 | /* denormalized */ | |
107 | asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); | |
108 | mant <<= lz - 8; | |
109 | exp = (-118 - lz) << 23; | |
110 | } else { | |
111 | mant |= 0x800000; | |
112 | exp -= 127 << 23; | |
113 | } | |
114 | ||
115 | if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ | |
116 | exp |= 0x400000; /* 0.5 * 2^23 */ | |
117 | asm("mulhwu %0,%1,%2" : "=r" (mant) | |
118 | : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ | |
119 | } | |
120 | if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ | |
121 | exp |= 0x200000; /* 0.25 * 2^23 */ | |
122 | asm("mulhwu %0,%1,%2" : "=r" (mant) | |
123 | : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ | |
124 | } | |
125 | if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ | |
126 | exp |= 0x100000; /* 0.125 * 2^23 */ | |
127 | asm("mulhwu %0,%1,%2" : "=r" (mant) | |
128 | : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ | |
129 | } | |
130 | if (mant > 0x800000) { /* 1.0 * 2^23 */ | |
131 | /* calculate (mant - 1) * 1.381097463 */ | |
132 | /* 1.381097463 == 0.125 / (2^0.125 - 1) */ | |
133 | asm("mulhwu %0,%1,%2" : "=r" (frac) | |
134 | : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); | |
135 | exp += frac; | |
136 | } | |
137 | s = exp & 0x80000000; | |
138 | if (exp != 0) { | |
139 | if (s) | |
140 | exp = -exp; | |
141 | asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); | |
142 | lz = 8 - lz; | |
143 | if (lz > 0) | |
144 | exp >>= lz; | |
145 | else if (lz < 0) | |
146 | exp <<= -lz; | |
147 | s += ((lz + 126) << 23) + exp; | |
148 | } | |
149 | return s; | |
150 | } | |
151 | ||
152 | #define VSCR_SAT 1 | |
153 | ||
154 | static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) | |
155 | { | |
156 | int exp, mant; | |
157 | ||
158 | exp = (x >> 23) & 0xff; | |
159 | mant = x & 0x7fffff; | |
160 | if (exp == 255 && mant != 0) | |
161 | return 0; /* NaN -> 0 */ | |
162 | exp = exp - 127 + scale; | |
163 | if (exp < 0) | |
164 | return 0; /* round towards zero */ | |
165 | if (exp >= 31) { | |
166 | /* saturate, unless the result would be -2^31 */ | |
167 | if (x + (scale << 23) != 0xcf000000) | |
168 | *vscrp |= VSCR_SAT; | |
169 | return (x & 0x80000000)? 0x80000000: 0x7fffffff; | |
170 | } | |
171 | mant |= 0x800000; | |
172 | mant = (mant << 7) >> (30 - exp); | |
173 | return (x & 0x80000000)? -mant: mant; | |
174 | } | |
175 | ||
176 | static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) | |
177 | { | |
178 | int exp; | |
179 | unsigned int mant; | |
180 | ||
181 | exp = (x >> 23) & 0xff; | |
182 | mant = x & 0x7fffff; | |
183 | if (exp == 255 && mant != 0) | |
184 | return 0; /* NaN -> 0 */ | |
185 | exp = exp - 127 + scale; | |
186 | if (exp < 0) | |
187 | return 0; /* round towards zero */ | |
188 | if (x & 0x80000000) { | |
189 | /* negative => saturate to 0 */ | |
190 | *vscrp |= VSCR_SAT; | |
191 | return 0; | |
192 | } | |
193 | if (exp >= 32) { | |
194 | /* saturate */ | |
195 | *vscrp |= VSCR_SAT; | |
196 | return 0xffffffff; | |
197 | } | |
198 | mant |= 0x800000; | |
199 | mant = (mant << 8) >> (31 - exp); | |
200 | return mant; | |
201 | } | |
202 | ||
203 | /* Round to floating integer, towards 0 */ | |
204 | static unsigned int rfiz(unsigned int x) | |
205 | { | |
206 | int exp; | |
207 | ||
208 | exp = ((x >> 23) & 0xff) - 127; | |
209 | if (exp == 128 && (x & 0x7fffff) != 0) | |
210 | return x | 0x400000; /* NaN -> make it a QNaN */ | |
211 | if (exp >= 23) | |
212 | return x; /* it's an integer already (or Inf) */ | |
213 | if (exp < 0) | |
214 | return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ | |
215 | return x & ~(0x7fffff >> exp); | |
216 | } | |
217 | ||
218 | /* Round to floating integer, towards +/- Inf */ | |
219 | static unsigned int rfii(unsigned int x) | |
220 | { | |
221 | int exp, mask; | |
222 | ||
223 | exp = ((x >> 23) & 0xff) - 127; | |
224 | if (exp == 128 && (x & 0x7fffff) != 0) | |
225 | return x | 0x400000; /* NaN -> make it a QNaN */ | |
226 | if (exp >= 23) | |
227 | return x; /* it's an integer already (or Inf) */ | |
228 | if ((x & 0x7fffffff) == 0) | |
229 | return x; /* +/-0 -> +/-0 */ | |
230 | if (exp < 0) | |
231 | /* 0 < |x| < 1.0 rounds to +/- 1.0 */ | |
232 | return (x & 0x80000000) | 0x3f800000; | |
233 | mask = 0x7fffff >> exp; | |
234 | /* mantissa overflows into exponent - that's OK, | |
235 | it can't overflow into the sign bit */ | |
236 | return (x + mask) & ~mask; | |
237 | } | |
238 | ||
239 | /* Round to floating integer, to nearest */ | |
240 | static unsigned int rfin(unsigned int x) | |
241 | { | |
242 | int exp, half; | |
243 | ||
244 | exp = ((x >> 23) & 0xff) - 127; | |
245 | if (exp == 128 && (x & 0x7fffff) != 0) | |
246 | return x | 0x400000; /* NaN -> make it a QNaN */ | |
247 | if (exp >= 23) | |
248 | return x; /* it's an integer already (or Inf) */ | |
249 | if (exp < -1) | |
250 | return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ | |
251 | if (exp == -1) | |
252 | /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ | |
253 | return (x & 0x80000000) | 0x3f800000; | |
254 | half = 0x400000 >> exp; | |
255 | /* add 0.5 to the magnitude and chop off the fraction bits */ | |
256 | return (x + half) & ~(0x7fffff >> exp); | |
257 | } | |
258 | ||
259 | int emulate_altivec(struct pt_regs *regs) | |
260 | { | |
261 | unsigned int instr, i; | |
262 | unsigned int va, vb, vc, vd; | |
263 | vector128 *vrs; | |
264 | ||
265 | if (get_user(instr, (unsigned int __user *) regs->nip)) | |
266 | return -EFAULT; | |
267 | if ((instr >> 26) != 4) | |
268 | return -EINVAL; /* not an altivec instruction */ | |
269 | vd = (instr >> 21) & 0x1f; | |
270 | va = (instr >> 16) & 0x1f; | |
271 | vb = (instr >> 11) & 0x1f; | |
272 | vc = (instr >> 6) & 0x1f; | |
273 | ||
274 | vrs = current->thread.vr; | |
275 | switch (instr & 0x3f) { | |
276 | case 10: | |
277 | switch (vc) { | |
278 | case 0: /* vaddfp */ | |
279 | vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); | |
280 | break; | |
281 | case 1: /* vsubfp */ | |
282 | vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); | |
283 | break; | |
284 | case 4: /* vrefp */ | |
285 | vrefp(&vrs[vd], &vrs[vb]); | |
286 | break; | |
287 | case 5: /* vrsqrtefp */ | |
288 | vrsqrtefp(&vrs[vd], &vrs[vb]); | |
289 | break; | |
290 | case 6: /* vexptefp */ | |
291 | for (i = 0; i < 4; ++i) | |
292 | vrs[vd].u[i] = eexp2(vrs[vb].u[i]); | |
293 | break; | |
294 | case 7: /* vlogefp */ | |
295 | for (i = 0; i < 4; ++i) | |
296 | vrs[vd].u[i] = elog2(vrs[vb].u[i]); | |
297 | break; | |
298 | case 8: /* vrfin */ | |
299 | for (i = 0; i < 4; ++i) | |
300 | vrs[vd].u[i] = rfin(vrs[vb].u[i]); | |
301 | break; | |
302 | case 9: /* vrfiz */ | |
303 | for (i = 0; i < 4; ++i) | |
304 | vrs[vd].u[i] = rfiz(vrs[vb].u[i]); | |
305 | break; | |
306 | case 10: /* vrfip */ | |
307 | for (i = 0; i < 4; ++i) { | |
308 | u32 x = vrs[vb].u[i]; | |
309 | x = (x & 0x80000000)? rfiz(x): rfii(x); | |
310 | vrs[vd].u[i] = x; | |
311 | } | |
312 | break; | |
313 | case 11: /* vrfim */ | |
314 | for (i = 0; i < 4; ++i) { | |
315 | u32 x = vrs[vb].u[i]; | |
316 | x = (x & 0x80000000)? rfii(x): rfiz(x); | |
317 | vrs[vd].u[i] = x; | |
318 | } | |
319 | break; | |
320 | case 14: /* vctuxs */ | |
321 | for (i = 0; i < 4; ++i) | |
322 | vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, | |
323 | ¤t->thread.vscr.u[3]); | |
324 | break; | |
325 | case 15: /* vctsxs */ | |
326 | for (i = 0; i < 4; ++i) | |
327 | vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, | |
328 | ¤t->thread.vscr.u[3]); | |
329 | break; | |
330 | default: | |
331 | return -EINVAL; | |
332 | } | |
333 | break; | |
334 | case 46: /* vmaddfp */ | |
335 | vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); | |
336 | break; | |
337 | case 47: /* vnmsubfp */ | |
338 | vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); | |
339 | break; | |
340 | default: | |
341 | return -EINVAL; | |
342 | } | |
343 | ||
344 | return 0; | |
345 | } |