arch/i386/math-emu/poly_sin.c

   1 /*---------------------------------------------------------------------------+
   2  |  poly_sin.c                                                               |
   3  |                                                                           |
   4  |  Computation of an approximation of the sin function and the cosine       |
   5  |  function by a polynomial.                                                |
   6  |                                                                           |
   7  | Copyright (C) 1992,1993,1994,1997,1999                                    |
   8  |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
   9  |                  E-mail   billm@melbpc.org.au                             |
  10  |                                                                           |
  11  |                                                                           |
  12  +---------------------------------------------------------------------------*/
  13
  14
  15 #include "exception.h"
  16 #include "reg_constant.h"
  17 #include "fpu_emu.h"
  18 #include "fpu_system.h"
  19 #include "control_w.h"
  20 #include "poly.h"
  21
  22
  23 #define N_COEFF_P       4
  24 #define N_COEFF_N       4
  25
  26 static const unsigned long long pos_terms_l[N_COEFF_P] =
  27 {
  28   0xaaaaaaaaaaaaaaabLL,
  29   0x00d00d00d00cf906LL,
  30   0x000006b99159a8bbLL,
  31   0x000000000d7392e6LL
  32 };
  33
  34 static const unsigned long long neg_terms_l[N_COEFF_N] =
  35 {
  36   0x2222222222222167LL,
  37   0x0002e3bc74aab624LL,
  38   0x0000000b09229062LL,
  39   0x00000000000c7973LL
  40 };
  41
  42
  43
  44 #define N_COEFF_PH      4
  45 #define N_COEFF_NH      4
  46 static const unsigned long long pos_terms_h[N_COEFF_PH] =
  47 {
  48   0x0000000000000000LL,
  49   0x05b05b05b05b0406LL,
  50   0x000049f93edd91a9LL,
  51   0x00000000c9c9ed62LL
  52 };
  53
  54 static const unsigned long long neg_terms_h[N_COEFF_NH] =
  55 {
  56   0xaaaaaaaaaaaaaa98LL,
  57   0x001a01a01a019064LL,
  58   0x0000008f76c68a77LL,
  59   0x0000000000d58f5eLL
  60 };
  61
  62
  63 /*--- poly_sine() -----------------------------------------------------------+
  64  |                                                                           |
  65  +---------------------------------------------------------------------------*/
  66 void    poly_sine(FPU_REG *st0_ptr)
  67 {
  68   int                 exponent, echange;
  69   Xsig                accumulator, argSqrd, argTo4;
  70   unsigned long       fix_up, adj;
  71   unsigned long long  fixed_arg;
  72   FPU_REG             result;
  73
  74   exponent = exponent(st0_ptr);
  75
  76   accumulator.lsw = accumulator.midw = accumulator.msw = 0;
  77
  78   /* Split into two ranges, for arguments below and above 1.0 */
  79   /* The boundary between upper and lower is approx 0.88309101259 */
  80   if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) )
  81     {
  82       /* The argument is <= 0.88309101259 */
  83
  84       argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0;
  85       mul64_Xsig(&argSqrd, &significand(st0_ptr));
  86       shr_Xsig(&argSqrd, 2*(-1-exponent));
  87       argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
  88       argTo4.lsw = argSqrd.lsw;
  89       mul_Xsig_Xsig(&argTo4, &argTo4);
  90
  91       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
  92                       N_COEFF_N-1);
  93       mul_Xsig_Xsig(&accumulator, &argSqrd);
  94       negate_Xsig(&accumulator);
  95
  96       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
  97                       N_COEFF_P-1);
  98
  99       shr_Xsig(&accumulator, 2);    /* Divide by four */
 100       accumulator.msw |= 0x80000000;  /* Add 1.0 */
 101
 102       mul64_Xsig(&accumulator, &significand(st0_ptr));
 103       mul64_Xsig(&accumulator, &significand(st0_ptr));
 104       mul64_Xsig(&accumulator, &significand(st0_ptr));
 105
 106       /* Divide by four, FPU_REG compatible, etc */
 107       exponent = 3*exponent;
 108
 109       /* The minimum exponent difference is 3 */
 110       shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
 111
 112       negate_Xsig(&accumulator);
 113       XSIG_LL(accumulator) += significand(st0_ptr);
 114
 115       echange = round_Xsig(&accumulator);
 116
 117       setexponentpos(&result, exponent(st0_ptr) + echange);
 118     }
 119   else
 120     {
 121       /* The argument is > 0.88309101259 */
 122       /* We use sin(st(0)) = cos(pi/2-st(0)) */
 123
 124       fixed_arg = significand(st0_ptr);
 125
 126       if ( exponent == 0 )
 127         {
 128           /* The argument is >= 1.0 */
 129
 130           /* Put the binary point at the left. */
 131           fixed_arg <<= 1;
 132         }
 133       /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
 134       fixed_arg = 0x921fb54442d18469LL - fixed_arg;
 135       /* There is a special case which arises due to rounding, to fix here. */
 136       if ( fixed_arg == 0xffffffffffffffffLL )
 137         fixed_arg = 0;
 138
 139       XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
 140       mul64_Xsig(&argSqrd, &fixed_arg);
 141
 142       XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
 143       mul_Xsig_Xsig(&argTo4, &argTo4);
 144
 145       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
 146                       N_COEFF_NH-1);
 147       mul_Xsig_Xsig(&accumulator, &argSqrd);
 148       negate_Xsig(&accumulator);
 149
 150       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
 151                       N_COEFF_PH-1);
 152       negate_Xsig(&accumulator);
 153
 154       mul64_Xsig(&accumulator, &fixed_arg);
 155       mul64_Xsig(&accumulator, &fixed_arg);
 156
 157       shr_Xsig(&accumulator, 3);
 158       negate_Xsig(&accumulator);
 159
 160       add_Xsig_Xsig(&accumulator, &argSqrd);
 161
 162       shr_Xsig(&accumulator, 1);
 163
 164       accumulator.lsw |= 1;  /* A zero accumulator here would cause problems */
 165       negate_Xsig(&accumulator);
 166
 167       /* The basic computation is complete. Now fix the answer to
 168          compensate for the error due to the approximation used for
 169          pi/2
 170          */
 171
 172       /* This has an exponent of -65 */
 173       fix_up = 0x898cc517;
 174       /* The fix-up needs to be improved for larger args */
 175       if ( argSqrd.msw & 0xffc00000 )
 176         {
 177           /* Get about 32 bit precision in these: */
 178           fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
 179         }
 180       fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
 181
 182       adj = accumulator.lsw;    /* temp save */
 183       accumulator.lsw -= fix_up;
 184       if ( accumulator.lsw > adj )
 185         XSIG_LL(accumulator) --;
 186
 187       echange = round_Xsig(&accumulator);
 188
 189       setexponentpos(&result, echange - 1);
 190     }
 191
 192   significand(&result) = XSIG_LL(accumulator);
 193   setsign(&result, getsign(st0_ptr));
 194   FPU_copy_to_reg0(&result, TAG_Valid);
 195
 196 #ifdef PARANOID
 197   if ( (exponent(&result) >= 0)
 198       && (significand(&result) > 0x8000000000000000LL) )
 199     {
 200       EXCEPTION(EX_INTERNAL|0x150);
 201     }
 202 #endif /* PARANOID */
 203
 204 }
 205
 206
 207
 208 /*--- poly_cos() ------------------------------------------------------------+
 209  |                                                                           |
 210  +---------------------------------------------------------------------------*/
 211 void    poly_cos(FPU_REG *st0_ptr)
 212 {
 213   FPU_REG             result;
 214   long int            exponent, exp2, echange;
 215   Xsig                accumulator, argSqrd, fix_up, argTo4;
 216   unsigned long long  fixed_arg;
 217
 218 #ifdef PARANOID
 219   if ( (exponent(st0_ptr) > 0)
 220       || ((exponent(st0_ptr) == 0)
 221           && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) )
 222     {
 223       EXCEPTION(EX_Invalid);
 224       FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
 225       return;
 226     }
 227 #endif /* PARANOID */
 228
 229   exponent = exponent(st0_ptr);
 230
 231   accumulator.lsw = accumulator.midw = accumulator.msw = 0;
 232
 233   if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) )
 234     {
 235       /* arg is < 0.687705 */
 236
 237       argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl;
 238       argSqrd.lsw = 0;
 239       mul64_Xsig(&argSqrd, &significand(st0_ptr));
 240
 241       if ( exponent < -1 )
 242         {
 243           /* shift the argument right by the required places */
 244           shr_Xsig(&argSqrd, 2*(-1-exponent));
 245         }
 246
 247       argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
 248       argTo4.lsw = argSqrd.lsw;
 249       mul_Xsig_Xsig(&argTo4, &argTo4);
 250
 251       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
 252                       N_COEFF_NH-1);
 253       mul_Xsig_Xsig(&accumulator, &argSqrd);
 254       negate_Xsig(&accumulator);
 255
 256       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
 257                       N_COEFF_PH-1);
 258       negate_Xsig(&accumulator);
 259
 260       mul64_Xsig(&accumulator, &significand(st0_ptr));
 261       mul64_Xsig(&accumulator, &significand(st0_ptr));
 262       shr_Xsig(&accumulator, -2*(1+exponent));
 263
 264       shr_Xsig(&accumulator, 3);
 265       negate_Xsig(&accumulator);
 266
 267       add_Xsig_Xsig(&accumulator, &argSqrd);
 268
 269       shr_Xsig(&accumulator, 1);
 270
 271       /* It doesn't matter if accumulator is all zero here, the
 272          following code will work ok */
 273       negate_Xsig(&accumulator);
 274
 275       if ( accumulator.lsw & 0x80000000 )
 276         XSIG_LL(accumulator) ++;
 277       if ( accumulator.msw == 0 )
 278         {
 279           /* The result is 1.0 */
 280           FPU_copy_to_reg0(&CONST_1, TAG_Valid);
 281           return;
 282         }
 283       else
 284         {
 285           significand(&result) = XSIG_LL(accumulator);
 286
 287           /* will be a valid positive nr with expon = -1 */
 288           setexponentpos(&result, -1);
 289         }
 290     }
 291   else
 292     {
 293       fixed_arg = significand(st0_ptr);
 294
 295       if ( exponent == 0 )
 296         {
 297           /* The argument is >= 1.0 */
 298
 299           /* Put the binary point at the left. */
 300           fixed_arg <<= 1;
 301         }
 302       /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
 303       fixed_arg = 0x921fb54442d18469LL - fixed_arg;
 304       /* There is a special case which arises due to rounding, to fix here. */
 305       if ( fixed_arg == 0xffffffffffffffffLL )
 306         fixed_arg = 0;
 307
 308       exponent = -1;
 309       exp2 = -1;
 310
 311       /* A shift is needed here only for a narrow range of arguments,
 312          i.e. for fixed_arg approx 2^-32, but we pick up more... */
 313       if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
 314         {
 315           fixed_arg <<= 16;
 316           exponent -= 16;
 317           exp2 -= 16;
 318         }
 319
 320       XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
 321       mul64_Xsig(&argSqrd, &fixed_arg);
 322
 323       if ( exponent < -1 )
 324         {
 325           /* shift the argument right by the required places */
 326           shr_Xsig(&argSqrd, 2*(-1-exponent));
 327         }
 328
 329       argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
 330       argTo4.lsw = argSqrd.lsw;
 331       mul_Xsig_Xsig(&argTo4, &argTo4);
 332
 333       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
 334                       N_COEFF_N-1);
 335       mul_Xsig_Xsig(&accumulator, &argSqrd);
 336       negate_Xsig(&accumulator);
 337
 338       polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
 339                       N_COEFF_P-1);
 340
 341       shr_Xsig(&accumulator, 2);    /* Divide by four */
 342       accumulator.msw |= 0x80000000;  /* Add 1.0 */
 343
 344       mul64_Xsig(&accumulator, &fixed_arg);
 345       mul64_Xsig(&accumulator, &fixed_arg);
 346       mul64_Xsig(&accumulator, &fixed_arg);
 347
 348       /* Divide by four, FPU_REG compatible, etc */
 349       exponent = 3*exponent;
 350
 351       /* The minimum exponent difference is 3 */
 352       shr_Xsig(&accumulator, exp2 - exponent);
 353
 354       negate_Xsig(&accumulator);
 355       XSIG_LL(accumulator) += fixed_arg;
 356
 357       /* The basic computation is complete. Now fix the answer to
 358          compensate for the error due to the approximation used for
 359          pi/2
 360          */
 361
 362       /* This has an exponent of -65 */
 363       XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
 364       fix_up.lsw = 0;
 365
 366       /* The fix-up needs to be improved for larger args */
 367       if ( argSqrd.msw & 0xffc00000 )
 368         {
 369           /* Get about 32 bit precision in these: */
 370           fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
 371           fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
 372         }
 373
 374       exp2 += norm_Xsig(&accumulator);
 375       shr_Xsig(&accumulator, 1); /* Prevent overflow */
 376       exp2++;
 377       shr_Xsig(&fix_up, 65 + exp2);
 378
 379       add_Xsig_Xsig(&accumulator, &fix_up);
 380
 381       echange = round_Xsig(&accumulator);
 382
 383       setexponentpos(&result, exp2 + echange);
 384       significand(&result) = XSIG_LL(accumulator);
 385     }
 386
 387   FPU_copy_to_reg0(&result, TAG_Valid);
 388
 389 #ifdef PARANOID
 390   if ( (exponent(&result) >= 0)
 391       && (significand(&result) > 0x8000000000000000LL) )
 392     {
 393       EXCEPTION(EX_INTERNAL|0x151);
 394     }
 395 #endif /* PARANOID */
 396
 397 }