Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | |
2 | /* | |
3 | =============================================================================== | |
4 | ||
5 | This C source fragment is part of the SoftFloat IEC/IEEE Floating-point | |
6 | Arithmetic Package, Release 2. | |
7 | ||
8 | Written by John R. Hauser. This work was made possible in part by the | |
9 | International Computer Science Institute, located at Suite 600, 1947 Center | |
10 | Street, Berkeley, California 94704. Funding was partially provided by the | |
11 | National Science Foundation under grant MIP-9311980. The original version | |
12 | of this code was written as part of a project to build a fixed-point vector | |
13 | processor in collaboration with the University of California at Berkeley, | |
14 | overseen by Profs. Nelson Morgan and John Wawrzynek. More information | |
50a23e6e JM |
15 | is available through the web page |
16 | http://www.jhauser.us/arithmetic/SoftFloat-2b/SoftFloat-source.txt | |
1da177e4 LT |
17 | |
18 | THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort | |
19 | has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT | |
20 | TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO | |
21 | PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY | |
22 | AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. | |
23 | ||
24 | Derivative works are acceptable, even for commercial purposes, so long as | |
25 | (1) they include prominent notice that the work is derivative, and (2) they | |
26 | include prominent notice akin to these three paragraphs for those parts of | |
27 | this code that are retained. | |
28 | ||
29 | =============================================================================== | |
30 | */ | |
31 | ||
32 | /* | |
33 | ------------------------------------------------------------------------------- | |
34 | Shifts `a' right by the number of bits given in `count'. If any nonzero | |
35 | bits are shifted off, they are ``jammed'' into the least significant bit of | |
36 | the result by setting the least significant bit to 1. The value of `count' | |
37 | can be arbitrarily large; in particular, if `count' is greater than 32, the | |
38 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. | |
39 | The result is stored in the location pointed to by `zPtr'. | |
40 | ------------------------------------------------------------------------------- | |
41 | */ | |
42 | INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) | |
43 | { | |
44 | bits32 z; | |
45 | if ( count == 0 ) { | |
46 | z = a; | |
47 | } | |
48 | else if ( count < 32 ) { | |
49 | z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); | |
50 | } | |
51 | else { | |
52 | z = ( a != 0 ); | |
53 | } | |
54 | *zPtr = z; | |
55 | } | |
56 | ||
57 | /* | |
58 | ------------------------------------------------------------------------------- | |
59 | Shifts `a' right by the number of bits given in `count'. If any nonzero | |
60 | bits are shifted off, they are ``jammed'' into the least significant bit of | |
61 | the result by setting the least significant bit to 1. The value of `count' | |
62 | can be arbitrarily large; in particular, if `count' is greater than 64, the | |
63 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. | |
64 | The result is stored in the location pointed to by `zPtr'. | |
65 | ------------------------------------------------------------------------------- | |
66 | */ | |
67 | INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) | |
68 | { | |
69 | bits64 z; | |
70 | ||
71 | __asm__("@shift64RightJamming -- start"); | |
72 | if ( count == 0 ) { | |
73 | z = a; | |
74 | } | |
75 | else if ( count < 64 ) { | |
76 | z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); | |
77 | } | |
78 | else { | |
79 | z = ( a != 0 ); | |
80 | } | |
81 | __asm__("@shift64RightJamming -- end"); | |
82 | *zPtr = z; | |
83 | } | |
84 | ||
85 | /* | |
86 | ------------------------------------------------------------------------------- | |
87 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 | |
88 | _plus_ the number of bits given in `count'. The shifted result is at most | |
89 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The | |
90 | bits shifted off form a second 64-bit result as follows: The _last_ bit | |
91 | shifted off is the most-significant bit of the extra result, and the other | |
92 | 63 bits of the extra result are all zero if and only if _all_but_the_last_ | |
93 | bits shifted off were all zero. This extra result is stored in the location | |
94 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. | |
95 | (This routine makes more sense if `a0' and `a1' are considered to form a | |
96 | fixed-point value with binary point between `a0' and `a1'. This fixed-point | |
97 | value is shifted right by the number of bits given in `count', and the | |
98 | integer part of the result is returned at the location pointed to by | |
99 | `z0Ptr'. The fractional part of the result may be slightly corrupted as | |
100 | described above, and is returned at the location pointed to by `z1Ptr'.) | |
101 | ------------------------------------------------------------------------------- | |
102 | */ | |
103 | INLINE void | |
104 | shift64ExtraRightJamming( | |
105 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) | |
106 | { | |
107 | bits64 z0, z1; | |
108 | int8 negCount = ( - count ) & 63; | |
109 | ||
110 | if ( count == 0 ) { | |
111 | z1 = a1; | |
112 | z0 = a0; | |
113 | } | |
114 | else if ( count < 64 ) { | |
115 | z1 = ( a0<<negCount ) | ( a1 != 0 ); | |
116 | z0 = a0>>count; | |
117 | } | |
118 | else { | |
119 | if ( count == 64 ) { | |
120 | z1 = a0 | ( a1 != 0 ); | |
121 | } | |
122 | else { | |
123 | z1 = ( ( a0 | a1 ) != 0 ); | |
124 | } | |
125 | z0 = 0; | |
126 | } | |
127 | *z1Ptr = z1; | |
128 | *z0Ptr = z0; | |
129 | ||
130 | } | |
131 | ||
132 | /* | |
133 | ------------------------------------------------------------------------------- | |
134 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the | |
135 | number of bits given in `count'. Any bits shifted off are lost. The value | |
136 | of `count' can be arbitrarily large; in particular, if `count' is greater | |
137 | than 128, the result will be 0. The result is broken into two 64-bit pieces | |
138 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. | |
139 | ------------------------------------------------------------------------------- | |
140 | */ | |
141 | INLINE void | |
142 | shift128Right( | |
143 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) | |
144 | { | |
145 | bits64 z0, z1; | |
146 | int8 negCount = ( - count ) & 63; | |
147 | ||
148 | if ( count == 0 ) { | |
149 | z1 = a1; | |
150 | z0 = a0; | |
151 | } | |
152 | else if ( count < 64 ) { | |
153 | z1 = ( a0<<negCount ) | ( a1>>count ); | |
154 | z0 = a0>>count; | |
155 | } | |
156 | else { | |
157 | z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; | |
158 | z0 = 0; | |
159 | } | |
160 | *z1Ptr = z1; | |
161 | *z0Ptr = z0; | |
162 | ||
163 | } | |
164 | ||
165 | /* | |
166 | ------------------------------------------------------------------------------- | |
167 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the | |
168 | number of bits given in `count'. If any nonzero bits are shifted off, they | |
169 | are ``jammed'' into the least significant bit of the result by setting the | |
170 | least significant bit to 1. The value of `count' can be arbitrarily large; | |
171 | in particular, if `count' is greater than 128, the result will be either 0 | |
172 | or 1, depending on whether the concatenation of `a0' and `a1' is zero or | |
173 | nonzero. The result is broken into two 64-bit pieces which are stored at | |
174 | the locations pointed to by `z0Ptr' and `z1Ptr'. | |
175 | ------------------------------------------------------------------------------- | |
176 | */ | |
177 | INLINE void | |
178 | shift128RightJamming( | |
179 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) | |
180 | { | |
181 | bits64 z0, z1; | |
182 | int8 negCount = ( - count ) & 63; | |
183 | ||
184 | if ( count == 0 ) { | |
185 | z1 = a1; | |
186 | z0 = a0; | |
187 | } | |
188 | else if ( count < 64 ) { | |
189 | z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); | |
190 | z0 = a0>>count; | |
191 | } | |
192 | else { | |
193 | if ( count == 64 ) { | |
194 | z1 = a0 | ( a1 != 0 ); | |
195 | } | |
196 | else if ( count < 128 ) { | |
197 | z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); | |
198 | } | |
199 | else { | |
200 | z1 = ( ( a0 | a1 ) != 0 ); | |
201 | } | |
202 | z0 = 0; | |
203 | } | |
204 | *z1Ptr = z1; | |
205 | *z0Ptr = z0; | |
206 | ||
207 | } | |
208 | ||
209 | /* | |
210 | ------------------------------------------------------------------------------- | |
211 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right | |
212 | by 64 _plus_ the number of bits given in `count'. The shifted result is | |
213 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are | |
214 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted | |
215 | off form a third 64-bit result as follows: The _last_ bit shifted off is | |
216 | the most-significant bit of the extra result, and the other 63 bits of the | |
217 | extra result are all zero if and only if _all_but_the_last_ bits shifted off | |
218 | were all zero. This extra result is stored in the location pointed to by | |
219 | `z2Ptr'. The value of `count' can be arbitrarily large. | |
220 | (This routine makes more sense if `a0', `a1', and `a2' are considered | |
221 | to form a fixed-point value with binary point between `a1' and `a2'. This | |
222 | fixed-point value is shifted right by the number of bits given in `count', | |
223 | and the integer part of the result is returned at the locations pointed to | |
224 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly | |
225 | corrupted as described above, and is returned at the location pointed to by | |
226 | `z2Ptr'.) | |
227 | ------------------------------------------------------------------------------- | |
228 | */ | |
229 | INLINE void | |
230 | shift128ExtraRightJamming( | |
231 | bits64 a0, | |
232 | bits64 a1, | |
233 | bits64 a2, | |
234 | int16 count, | |
235 | bits64 *z0Ptr, | |
236 | bits64 *z1Ptr, | |
237 | bits64 *z2Ptr | |
238 | ) | |
239 | { | |
240 | bits64 z0, z1, z2; | |
241 | int8 negCount = ( - count ) & 63; | |
242 | ||
243 | if ( count == 0 ) { | |
244 | z2 = a2; | |
245 | z1 = a1; | |
246 | z0 = a0; | |
247 | } | |
248 | else { | |
249 | if ( count < 64 ) { | |
250 | z2 = a1<<negCount; | |
251 | z1 = ( a0<<negCount ) | ( a1>>count ); | |
252 | z0 = a0>>count; | |
253 | } | |
254 | else { | |
255 | if ( count == 64 ) { | |
256 | z2 = a1; | |
257 | z1 = a0; | |
258 | } | |
259 | else { | |
260 | a2 |= a1; | |
261 | if ( count < 128 ) { | |
262 | z2 = a0<<negCount; | |
263 | z1 = a0>>( count & 63 ); | |
264 | } | |
265 | else { | |
266 | z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); | |
267 | z1 = 0; | |
268 | } | |
269 | } | |
270 | z0 = 0; | |
271 | } | |
272 | z2 |= ( a2 != 0 ); | |
273 | } | |
274 | *z2Ptr = z2; | |
275 | *z1Ptr = z1; | |
276 | *z0Ptr = z0; | |
277 | ||
278 | } | |
279 | ||
280 | /* | |
281 | ------------------------------------------------------------------------------- | |
282 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the | |
283 | number of bits given in `count'. Any bits shifted off are lost. The value | |
284 | of `count' must be less than 64. The result is broken into two 64-bit | |
285 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. | |
286 | ------------------------------------------------------------------------------- | |
287 | */ | |
288 | INLINE void | |
289 | shortShift128Left( | |
290 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) | |
291 | { | |
292 | ||
293 | *z1Ptr = a1<<count; | |
294 | *z0Ptr = | |
295 | ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); | |
296 | ||
297 | } | |
298 | ||
299 | /* | |
300 | ------------------------------------------------------------------------------- | |
301 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left | |
302 | by the number of bits given in `count'. Any bits shifted off are lost. | |
303 | The value of `count' must be less than 64. The result is broken into three | |
304 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', | |
305 | `z1Ptr', and `z2Ptr'. | |
306 | ------------------------------------------------------------------------------- | |
307 | */ | |
308 | INLINE void | |
309 | shortShift192Left( | |
310 | bits64 a0, | |
311 | bits64 a1, | |
312 | bits64 a2, | |
313 | int16 count, | |
314 | bits64 *z0Ptr, | |
315 | bits64 *z1Ptr, | |
316 | bits64 *z2Ptr | |
317 | ) | |
318 | { | |
319 | bits64 z0, z1, z2; | |
320 | int8 negCount; | |
321 | ||
322 | z2 = a2<<count; | |
323 | z1 = a1<<count; | |
324 | z0 = a0<<count; | |
325 | if ( 0 < count ) { | |
326 | negCount = ( ( - count ) & 63 ); | |
327 | z1 |= a2>>negCount; | |
328 | z0 |= a1>>negCount; | |
329 | } | |
330 | *z2Ptr = z2; | |
331 | *z1Ptr = z1; | |
332 | *z0Ptr = z0; | |
333 | ||
334 | } | |
335 | ||
336 | /* | |
337 | ------------------------------------------------------------------------------- | |
338 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit | |
339 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so | |
340 | any carry out is lost. The result is broken into two 64-bit pieces which | |
341 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. | |
342 | ------------------------------------------------------------------------------- | |
343 | */ | |
344 | INLINE void | |
345 | add128( | |
346 | bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) | |
347 | { | |
348 | bits64 z1; | |
349 | ||
350 | z1 = a1 + b1; | |
351 | *z1Ptr = z1; | |
352 | *z0Ptr = a0 + b0 + ( z1 < a1 ); | |
353 | ||
354 | } | |
355 | ||
356 | /* | |
357 | ------------------------------------------------------------------------------- | |
358 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the | |
359 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is | |
360 | modulo 2^192, so any carry out is lost. The result is broken into three | |
361 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', | |
362 | `z1Ptr', and `z2Ptr'. | |
363 | ------------------------------------------------------------------------------- | |
364 | */ | |
365 | INLINE void | |
366 | add192( | |
367 | bits64 a0, | |
368 | bits64 a1, | |
369 | bits64 a2, | |
370 | bits64 b0, | |
371 | bits64 b1, | |
372 | bits64 b2, | |
373 | bits64 *z0Ptr, | |
374 | bits64 *z1Ptr, | |
375 | bits64 *z2Ptr | |
376 | ) | |
377 | { | |
378 | bits64 z0, z1, z2; | |
379 | int8 carry0, carry1; | |
380 | ||
381 | z2 = a2 + b2; | |
382 | carry1 = ( z2 < a2 ); | |
383 | z1 = a1 + b1; | |
384 | carry0 = ( z1 < a1 ); | |
385 | z0 = a0 + b0; | |
386 | z1 += carry1; | |
387 | z0 += ( z1 < carry1 ); | |
388 | z0 += carry0; | |
389 | *z2Ptr = z2; | |
390 | *z1Ptr = z1; | |
391 | *z0Ptr = z0; | |
392 | ||
393 | } | |
394 | ||
395 | /* | |
396 | ------------------------------------------------------------------------------- | |
397 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the | |
398 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo | |
399 | 2^128, so any borrow out (carry out) is lost. The result is broken into two | |
400 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and | |
401 | `z1Ptr'. | |
402 | ------------------------------------------------------------------------------- | |
403 | */ | |
404 | INLINE void | |
405 | sub128( | |
406 | bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) | |
407 | { | |
408 | ||
409 | *z1Ptr = a1 - b1; | |
410 | *z0Ptr = a0 - b0 - ( a1 < b1 ); | |
411 | ||
412 | } | |
413 | ||
414 | /* | |
415 | ------------------------------------------------------------------------------- | |
416 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' | |
417 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'. | |
418 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The | |
419 | result is broken into three 64-bit pieces which are stored at the locations | |
420 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. | |
421 | ------------------------------------------------------------------------------- | |
422 | */ | |
423 | INLINE void | |
424 | sub192( | |
425 | bits64 a0, | |
426 | bits64 a1, | |
427 | bits64 a2, | |
428 | bits64 b0, | |
429 | bits64 b1, | |
430 | bits64 b2, | |
431 | bits64 *z0Ptr, | |
432 | bits64 *z1Ptr, | |
433 | bits64 *z2Ptr | |
434 | ) | |
435 | { | |
436 | bits64 z0, z1, z2; | |
437 | int8 borrow0, borrow1; | |
438 | ||
439 | z2 = a2 - b2; | |
440 | borrow1 = ( a2 < b2 ); | |
441 | z1 = a1 - b1; | |
442 | borrow0 = ( a1 < b1 ); | |
443 | z0 = a0 - b0; | |
444 | z0 -= ( z1 < borrow1 ); | |
445 | z1 -= borrow1; | |
446 | z0 -= borrow0; | |
447 | *z2Ptr = z2; | |
448 | *z1Ptr = z1; | |
449 | *z0Ptr = z0; | |
450 | ||
451 | } | |
452 | ||
453 | /* | |
454 | ------------------------------------------------------------------------------- | |
455 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken | |
456 | into two 64-bit pieces which are stored at the locations pointed to by | |
457 | `z0Ptr' and `z1Ptr'. | |
458 | ------------------------------------------------------------------------------- | |
459 | */ | |
460 | INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) | |
461 | { | |
462 | bits32 aHigh, aLow, bHigh, bLow; | |
463 | bits64 z0, zMiddleA, zMiddleB, z1; | |
464 | ||
465 | aLow = a; | |
466 | aHigh = a>>32; | |
467 | bLow = b; | |
468 | bHigh = b>>32; | |
469 | z1 = ( (bits64) aLow ) * bLow; | |
470 | zMiddleA = ( (bits64) aLow ) * bHigh; | |
471 | zMiddleB = ( (bits64) aHigh ) * bLow; | |
472 | z0 = ( (bits64) aHigh ) * bHigh; | |
473 | zMiddleA += zMiddleB; | |
474 | z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); | |
475 | zMiddleA <<= 32; | |
476 | z1 += zMiddleA; | |
477 | z0 += ( z1 < zMiddleA ); | |
478 | *z1Ptr = z1; | |
479 | *z0Ptr = z0; | |
480 | ||
481 | } | |
482 | ||
483 | /* | |
484 | ------------------------------------------------------------------------------- | |
485 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to | |
486 | obtain a 192-bit product. The product is broken into three 64-bit pieces | |
487 | which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and | |
488 | `z2Ptr'. | |
489 | ------------------------------------------------------------------------------- | |
490 | */ | |
491 | INLINE void | |
492 | mul128By64To192( | |
493 | bits64 a0, | |
494 | bits64 a1, | |
495 | bits64 b, | |
496 | bits64 *z0Ptr, | |
497 | bits64 *z1Ptr, | |
498 | bits64 *z2Ptr | |
499 | ) | |
500 | { | |
501 | bits64 z0, z1, z2, more1; | |
502 | ||
503 | mul64To128( a1, b, &z1, &z2 ); | |
504 | mul64To128( a0, b, &z0, &more1 ); | |
505 | add128( z0, more1, 0, z1, &z0, &z1 ); | |
506 | *z2Ptr = z2; | |
507 | *z1Ptr = z1; | |
508 | *z0Ptr = z0; | |
509 | ||
510 | } | |
511 | ||
512 | /* | |
513 | ------------------------------------------------------------------------------- | |
514 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the | |
515 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit | |
516 | product. The product is broken into four 64-bit pieces which are stored at | |
517 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. | |
518 | ------------------------------------------------------------------------------- | |
519 | */ | |
520 | INLINE void | |
521 | mul128To256( | |
522 | bits64 a0, | |
523 | bits64 a1, | |
524 | bits64 b0, | |
525 | bits64 b1, | |
526 | bits64 *z0Ptr, | |
527 | bits64 *z1Ptr, | |
528 | bits64 *z2Ptr, | |
529 | bits64 *z3Ptr | |
530 | ) | |
531 | { | |
532 | bits64 z0, z1, z2, z3; | |
533 | bits64 more1, more2; | |
534 | ||
535 | mul64To128( a1, b1, &z2, &z3 ); | |
536 | mul64To128( a1, b0, &z1, &more2 ); | |
537 | add128( z1, more2, 0, z2, &z1, &z2 ); | |
538 | mul64To128( a0, b0, &z0, &more1 ); | |
539 | add128( z0, more1, 0, z1, &z0, &z1 ); | |
540 | mul64To128( a0, b1, &more1, &more2 ); | |
541 | add128( more1, more2, 0, z2, &more1, &z2 ); | |
542 | add128( z0, z1, 0, more1, &z0, &z1 ); | |
543 | *z3Ptr = z3; | |
544 | *z2Ptr = z2; | |
545 | *z1Ptr = z1; | |
546 | *z0Ptr = z0; | |
547 | ||
548 | } | |
549 | ||
550 | /* | |
551 | ------------------------------------------------------------------------------- | |
552 | Returns an approximation to the 64-bit integer quotient obtained by dividing | |
553 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The | |
554 | divisor `b' must be at least 2^63. If q is the exact quotient truncated | |
555 | toward zero, the approximation returned lies between q and q + 2 inclusive. | |
556 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit | |
557 | unsigned integer is returned. | |
558 | ------------------------------------------------------------------------------- | |
559 | */ | |
560 | static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) | |
561 | { | |
562 | bits64 b0, b1; | |
563 | bits64 rem0, rem1, term0, term1; | |
564 | bits64 z; | |
565 | if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); | |
c1241c4c NP |
566 | b0 = b>>32; /* hence b0 is 32 bits wide now */ |
567 | if ( b0<<32 <= a0 ) { | |
568 | z = LIT64( 0xFFFFFFFF00000000 ); | |
569 | } else { | |
570 | z = a0; | |
571 | do_div( z, b0 ); | |
572 | z <<= 32; | |
573 | } | |
1da177e4 LT |
574 | mul64To128( b, z, &term0, &term1 ); |
575 | sub128( a0, a1, term0, term1, &rem0, &rem1 ); | |
576 | while ( ( (sbits64) rem0 ) < 0 ) { | |
577 | z -= LIT64( 0x100000000 ); | |
578 | b1 = b<<32; | |
579 | add128( rem0, rem1, b0, b1, &rem0, &rem1 ); | |
580 | } | |
581 | rem0 = ( rem0<<32 ) | ( rem1>>32 ); | |
c1241c4c NP |
582 | if ( b0<<32 <= rem0 ) { |
583 | z |= 0xFFFFFFFF; | |
584 | } else { | |
585 | do_div( rem0, b0 ); | |
586 | z |= rem0; | |
587 | } | |
1da177e4 LT |
588 | return z; |
589 | ||
590 | } | |
591 | ||
592 | /* | |
593 | ------------------------------------------------------------------------------- | |
594 | Returns an approximation to the square root of the 32-bit significand given | |
595 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of | |
596 | `aExp' (the least significant bit) is 1, the integer returned approximates | |
597 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' | |
598 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either | |
599 | case, the approximation returned lies strictly within +/-2 of the exact | |
600 | value. | |
601 | ------------------------------------------------------------------------------- | |
602 | */ | |
603 | static bits32 estimateSqrt32( int16 aExp, bits32 a ) | |
604 | { | |
605 | static const bits16 sqrtOddAdjustments[] = { | |
606 | 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, | |
607 | 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 | |
608 | }; | |
609 | static const bits16 sqrtEvenAdjustments[] = { | |
610 | 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, | |
611 | 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 | |
612 | }; | |
613 | int8 index; | |
614 | bits32 z; | |
c1241c4c | 615 | bits64 A; |
1da177e4 LT |
616 | |
617 | index = ( a>>27 ) & 15; | |
618 | if ( aExp & 1 ) { | |
619 | z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; | |
620 | z = ( ( a / z )<<14 ) + ( z<<15 ); | |
621 | a >>= 1; | |
622 | } | |
623 | else { | |
624 | z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; | |
625 | z = a / z + z; | |
626 | z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); | |
627 | if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); | |
628 | } | |
c1241c4c NP |
629 | A = ( (bits64) a )<<31; |
630 | do_div( A, z ); | |
631 | return ( (bits32) A ) + ( z>>1 ); | |
1da177e4 LT |
632 | |
633 | } | |
634 | ||
635 | /* | |
636 | ------------------------------------------------------------------------------- | |
637 | Returns the number of leading 0 bits before the most-significant 1 bit | |
638 | of `a'. If `a' is zero, 32 is returned. | |
639 | ------------------------------------------------------------------------------- | |
640 | */ | |
641 | static int8 countLeadingZeros32( bits32 a ) | |
642 | { | |
643 | static const int8 countLeadingZerosHigh[] = { | |
644 | 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, | |
645 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
646 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
647 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
648 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
649 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
650 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
651 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
652 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
653 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
654 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
655 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
656 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
657 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
658 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
659 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
660 | }; | |
661 | int8 shiftCount; | |
662 | ||
663 | shiftCount = 0; | |
664 | if ( a < 0x10000 ) { | |
665 | shiftCount += 16; | |
666 | a <<= 16; | |
667 | } | |
668 | if ( a < 0x1000000 ) { | |
669 | shiftCount += 8; | |
670 | a <<= 8; | |
671 | } | |
672 | shiftCount += countLeadingZerosHigh[ a>>24 ]; | |
673 | return shiftCount; | |
674 | ||
675 | } | |
676 | ||
677 | /* | |
678 | ------------------------------------------------------------------------------- | |
679 | Returns the number of leading 0 bits before the most-significant 1 bit | |
680 | of `a'. If `a' is zero, 64 is returned. | |
681 | ------------------------------------------------------------------------------- | |
682 | */ | |
683 | static int8 countLeadingZeros64( bits64 a ) | |
684 | { | |
685 | int8 shiftCount; | |
686 | ||
687 | shiftCount = 0; | |
688 | if ( a < ( (bits64) 1 )<<32 ) { | |
689 | shiftCount += 32; | |
690 | } | |
691 | else { | |
692 | a >>= 32; | |
693 | } | |
694 | shiftCount += countLeadingZeros32( a ); | |
695 | return shiftCount; | |
696 | ||
697 | } | |
698 | ||
699 | /* | |
700 | ------------------------------------------------------------------------------- | |
701 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' | |
702 | is equal to the 128-bit value formed by concatenating `b0' and `b1'. | |
703 | Otherwise, returns 0. | |
704 | ------------------------------------------------------------------------------- | |
705 | */ | |
706 | INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) | |
707 | { | |
708 | ||
709 | return ( a0 == b0 ) && ( a1 == b1 ); | |
710 | ||
711 | } | |
712 | ||
713 | /* | |
714 | ------------------------------------------------------------------------------- | |
715 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less | |
716 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'. | |
717 | Otherwise, returns 0. | |
718 | ------------------------------------------------------------------------------- | |
719 | */ | |
720 | INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) | |
721 | { | |
722 | ||
723 | return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); | |
724 | ||
725 | } | |
726 | ||
727 | /* | |
728 | ------------------------------------------------------------------------------- | |
729 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less | |
730 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, | |
731 | returns 0. | |
732 | ------------------------------------------------------------------------------- | |
733 | */ | |
734 | INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) | |
735 | { | |
736 | ||
737 | return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); | |
738 | ||
739 | } | |
740 | ||
741 | /* | |
742 | ------------------------------------------------------------------------------- | |
743 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is | |
744 | not equal to the 128-bit value formed by concatenating `b0' and `b1'. | |
745 | Otherwise, returns 0. | |
746 | ------------------------------------------------------------------------------- | |
747 | */ | |
748 | INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) | |
749 | { | |
750 | ||
751 | return ( a0 != b0 ) || ( a1 != b1 ); | |
752 | ||
753 | } | |
754 |