MIPS: math-emu: <MADDF|MSUBF>.D: Fix accuracy (64-bit case)

author Douglas Leung <douglas.leung@imgtec.com>

Thu, 27 Jul 2017 16:08:59 +0000 (18:08 +0200)

committer Ralf Baechle <ralf@linux-mips.org>

Tue, 29 Aug 2017 13:21:56 +0000 (15:21 +0200)
author Douglas Leung <douglas.leung@imgtec.com>
Thu, 27 Jul 2017 16:08:59 +0000 (18:08 +0200)
committer Ralf Baechle <ralf@linux-mips.org>
Tue, 29 Aug 2017 13:21:56 +0000 (15:21 +0200)
diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c

index e799fc826b0c7e99b1f2ee233438ce52fc040322..e0d9be5fbf4cd541406b93c5d2add6814a82a9eb 100644 (file)
--- a/arch/mips/math-emu/dp_maddf.c
+++ b/arch/mips/math-emu/dp_maddf.c
@@ -15,18 +15,44 @@
  #include "ieee754dp.h"
  
  
+/* 128 bits shift right logical with rounding. */
+void srl128(u64 *hptr, u64 *lptr, int count)
+{
+       u64 low;
+
+       if (count >= 128) {
+               *lptr = *hptr != 0 || *lptr != 0;
+               *hptr = 0;
+       } else if (count >= 64) {
+               if (count == 64) {
+                       *lptr = *hptr | (*lptr != 0);
+               } else {
+                       low = *lptr;
+                       *lptr = *hptr >> (count - 64);
+                       *lptr |= (*hptr << (128 - count)) != 0 || low != 0;
+               }
+               *hptr = 0;
+       } else {
+               low = *lptr;
+               *lptr = low >> count | *hptr << (64 - count);
+               *lptr |= (low << (64 - count)) != 0;
+               *hptr = *hptr >> count;
+       }
+}
+
  static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
                                  union ieee754dp y, enum maddf_flags flags)
  {
         int re;
         int rs;
-       u64 rm;
         unsigned lxm;
         unsigned hxm;
         unsigned lym;
         unsigned hym;
         u64 lrm;
         u64 hrm;
+       u64 lzm;
+       u64 hzm;
         u64 t;
         u64 at;
         int s;
@@ -172,7 +198,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
         ym <<= 64 - (DP_FBITS + 1);
  
         /*
-        * Multiply 64 bits xm, ym to give high 64 bits rm with stickness.
+        * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
          */
  
         /* 32 * 32 => 64 */
@@ -202,81 +228,110 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
  
         hrm = hrm + (t >> 32);
  
-       rm = hrm | (lrm != 0);
-
-       /*
-        * Sticky shift down to normal rounding precision.
-        */
-       if ((s64) rm < 0) {
-               rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
-                    ((rm << (DP_FBITS + 1 + 3)) != 0);
+       /* Put explicit bit at bit 126 if necessary */
+       if ((int64_t)hrm < 0) {
+               lrm = (hrm << 63) | (lrm >> 1);
+               hrm = hrm >> 1;
                 re++;
-       } else {
-               rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
-                    ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
         }
-       assert(rm & (DP_HIDDEN_BIT << 3));
  
-       if (zc == IEEE754_CLASS_ZERO)
-               return ieee754dp_format(rs, re, rm);
+       assert(hrm & (1 << 62));
  
-       /* And now the addition */
-       assert(zm & DP_HIDDEN_BIT);
+       if (zc == IEEE754_CLASS_ZERO) {
+               /*
+                * Move explicit bit from bit 126 to bit 55 since the
+                * ieee754dp_format code expects the mantissa to be
+                * 56 bits wide (53 + 3 rounding bits).
+                */
+               srl128(&hrm, &lrm, (126 - 55));
+               return ieee754dp_format(rs, re, lrm);
+       }
  
-       /*
-        * Provide guard,round and stick bit space.
-        */
-       zm <<= 3;
+       /* Move explicit bit from bit 52 to bit 126 */
+       lzm = 0;
+       hzm = zm << 10;
+       assert(hzm & (1 << 62));
  
+       /* Make the exponents the same */
         if (ze > re) {
                 /*
                  * Have to shift y fraction right to align.
                  */
                 s = ze - re;
-               rm = XDPSRS(rm, s);
+               srl128(&hrm, &lrm, s);
                 re += s;
         } else if (re > ze) {
                 /*
                  * Have to shift x fraction right to align.
                  */
                 s = re - ze;
-               zm = XDPSRS(zm, s);
+               srl128(&hzm, &lzm, s);
                 ze += s;
         }
         assert(ze == re);
         assert(ze <= DP_EMAX);
  
+       /* Do the addition */
         if (zs == rs) {
                 /*
-                * Generate 28 bit result of adding two 27 bit numbers
-                * leaving result in xm, xs and xe.
+                * Generate 128 bit result by adding two 127 bit numbers
+                * leaving result in hzm:lzm, zs and ze.
                  */
-               zm = zm + rm;
-
-               if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */
-                       zm = XDPSRS1(zm);
+               hzm = hzm + hrm + (lzm > (lzm + lrm));
+               lzm = lzm + lrm;
+               if ((int64_t)hzm < 0) {        /* carry out */
+                       srl128(&hzm, &lzm, 1);
                         ze++;
                 }
         } else {
-               if (zm >= rm) {
-                       zm = zm - rm;
+               if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
+                       hzm = hzm - hrm - (lzm < lrm);
+                       lzm = lzm - lrm;
                 } else {
-                       zm = rm - zm;
+                       hzm = hrm - hzm - (lrm < lzm);
+                       lzm = lrm - lzm;
                         zs = rs;
                 }
-               if (zm == 0)
+               if (lzm == 0 && hzm == 0)
                         return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
  
                 /*
-                * Normalize to rounding precision.
+                * Put explicit bit at bit 126 if necessary.
                  */
-               while ((zm >> (DP_FBITS + 3)) == 0) {
-                       zm <<= 1;
-                       ze--;
+               if (hzm == 0) {
+                       /* left shift by 63 or 64 bits */
+                       if ((int64_t)lzm < 0) {
+                               /* MSB of lzm is the explicit bit */
+                               hzm = lzm >> 1;
+                               lzm = lzm << 63;
+                               ze -= 63;
+                       } else {
+                               hzm = lzm;
+                               lzm = 0;
+                               ze -= 64;
+                       }
+               }
+
+               t = 0;
+               while ((hzm >> (62 - t)) == 0)
+                       t++;
+
+               assert(t <= 62);
+               if (t) {
+                       hzm = hzm << t | lzm >> (64 - t);
+                       lzm = lzm << t;
+                       ze -= t;
                 }
         }
  
-       return ieee754dp_format(zs, ze, zm);
+       /*
+        * Move explicit bit from bit 126 to bit 55 since the
+        * ieee754dp_format code expects the mantissa to be
+        * 56 bits wide (53 + 3 rounding bits).
+        */
+       srl128(&hzm, &lzm, (126 - 55));
+
+       return ieee754dp_format(zs, ze, lzm);
  }
  
  union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
author	Douglas Leung <douglas.leung@imgtec.com>
	Thu, 27 Jul 2017 16:08:59 +0000 (18:08 +0200)
committer	Ralf Baechle <ralf@linux-mips.org>
	Tue, 29 Aug 2017 13:21:56 +0000 (15:21 +0200)