[ARM] 3789/4: Fix VFP emulation to ignore VECITR for scalar instruction
authorGen FUKATSU <fukatsu.gen@jp.panasonic.com>
Thu, 21 Sep 2006 13:08:24 +0000 (14:08 +0100)
committerRussell King <rmk+kernel@arm.linux.org.uk>
Mon, 25 Sep 2006 09:34:06 +0000 (10:34 +0100)
VECITR in Floating-Point Exception register indicates the number of
remaining short vector iterations after a potential exception was
detected.

In case of exception caused by scalar instructions, VECITR is NOT updated.
Therefore emulation for VFP must ignore VECITR field
and treat "veclen" as zero when recognizing scalar instructing.

Signed-off-by: Gen Fukatsu <fukatsu.gen@jp.panasonic.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
arch/arm/vfp/vfp.h
arch/arm/vfp/vfpdouble.c
arch/arm/vfp/vfpsingle.c

index 96fdf30f6a3bb2098a39c5907ea89cd983a95b90..19ace2e37789fa87b911935b1b1835c475a24ea0 100644 (file)
@@ -355,3 +355,14 @@ u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand);
  * we check for an error.
  */
 #define VFP_EXCEPTION_ERROR    ((u32)-1 & ~VFP_NAN_FLAG)
+
+/*
+ * A flag to tell vfp instruction type
+ */
+#define OP_SCALAR      (1 << 0)
+#define OP_SD          (1 << 1)
+
+struct op {
+       u32 (* const fn)(int dd, int dn, int dm, u32 fpscr);
+       u32 flags;
+};
index add48e36c2dc2e36ead54cb031e2f6bd84cf3460..e19a4f7620dedecbfa8969704c148935915e751e 100644 (file)
@@ -659,22 +659,22 @@ static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr)
 }
 
 
-static u32 (* const fop_extfns[32])(int dd, int unused, int dm, u32 fpscr) = {
-       [FEXT_TO_IDX(FEXT_FCPY)]        = vfp_double_fcpy,
-       [FEXT_TO_IDX(FEXT_FABS)]        = vfp_double_fabs,
-       [FEXT_TO_IDX(FEXT_FNEG)]        = vfp_double_fneg,
-       [FEXT_TO_IDX(FEXT_FSQRT)]       = vfp_double_fsqrt,
-       [FEXT_TO_IDX(FEXT_FCMP)]        = vfp_double_fcmp,
-       [FEXT_TO_IDX(FEXT_FCMPE)]       = vfp_double_fcmpe,
-       [FEXT_TO_IDX(FEXT_FCMPZ)]       = vfp_double_fcmpz,
-       [FEXT_TO_IDX(FEXT_FCMPEZ)]      = vfp_double_fcmpez,
-       [FEXT_TO_IDX(FEXT_FCVT)]        = vfp_double_fcvts,
-       [FEXT_TO_IDX(FEXT_FUITO)]       = vfp_double_fuito,
-       [FEXT_TO_IDX(FEXT_FSITO)]       = vfp_double_fsito,
-       [FEXT_TO_IDX(FEXT_FTOUI)]       = vfp_double_ftoui,
-       [FEXT_TO_IDX(FEXT_FTOUIZ)]      = vfp_double_ftouiz,
-       [FEXT_TO_IDX(FEXT_FTOSI)]       = vfp_double_ftosi,
-       [FEXT_TO_IDX(FEXT_FTOSIZ)]      = vfp_double_ftosiz,
+static struct op fops_ext[32] = {
+       [FEXT_TO_IDX(FEXT_FCPY)]        = {vfp_double_fcpy, 0},
+       [FEXT_TO_IDX(FEXT_FABS)]        = {vfp_double_fabs, 0},
+       [FEXT_TO_IDX(FEXT_FNEG)]        = {vfp_double_fneg, 0},
+       [FEXT_TO_IDX(FEXT_FSQRT)]       = {vfp_double_fsqrt, 0},
+       [FEXT_TO_IDX(FEXT_FCMP)]        = {vfp_double_fcmp, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FCMPE)]       = {vfp_double_fcmpe, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FCMPZ)]       = {vfp_double_fcmpz, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FCMPEZ)]      = {vfp_double_fcmpez, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FCVT)]        = {vfp_double_fcvts, (OP_SD|OP_SCALAR)},
+       [FEXT_TO_IDX(FEXT_FUITO)]       = {vfp_double_fuito, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FSITO)]       = {vfp_double_fsito, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FTOUI)]       = {vfp_double_ftoui, (OP_SD|OP_SCALAR)},
+       [FEXT_TO_IDX(FEXT_FTOUIZ)]      = {vfp_double_ftouiz, (OP_SD|OP_SCALAR)},
+       [FEXT_TO_IDX(FEXT_FTOSI)]       = {vfp_double_ftosi, (OP_SD|OP_SCALAR)},
+       [FEXT_TO_IDX(FEXT_FTOSIZ)]      = {vfp_double_ftosiz, (OP_SD|OP_SCALAR)},
 };
 
 
@@ -1108,16 +1108,16 @@ static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr)
        return FPSCR_IOC;
 }
 
-static u32 (* const fop_fns[16])(int dd, int dn, int dm, u32 fpscr) = {
-       [FOP_TO_IDX(FOP_FMAC)]  = vfp_double_fmac,
-       [FOP_TO_IDX(FOP_FNMAC)] = vfp_double_fnmac,
-       [FOP_TO_IDX(FOP_FMSC)]  = vfp_double_fmsc,
-       [FOP_TO_IDX(FOP_FNMSC)] = vfp_double_fnmsc,
-       [FOP_TO_IDX(FOP_FMUL)]  = vfp_double_fmul,
-       [FOP_TO_IDX(FOP_FNMUL)] = vfp_double_fnmul,
-       [FOP_TO_IDX(FOP_FADD)]  = vfp_double_fadd,
-       [FOP_TO_IDX(FOP_FSUB)]  = vfp_double_fsub,
-       [FOP_TO_IDX(FOP_FDIV)]  = vfp_double_fdiv,
+static struct op fops[16] = {
+       [FOP_TO_IDX(FOP_FMAC)]  = {vfp_double_fmac, 0},
+       [FOP_TO_IDX(FOP_FNMAC)] = {vfp_double_fnmac, 0},
+       [FOP_TO_IDX(FOP_FMSC)]  = {vfp_double_fmsc, 0},
+       [FOP_TO_IDX(FOP_FNMSC)] = {vfp_double_fnmsc, 0},
+       [FOP_TO_IDX(FOP_FMUL)]  = {vfp_double_fmul, 0},
+       [FOP_TO_IDX(FOP_FNMUL)] = {vfp_double_fnmul, 0},
+       [FOP_TO_IDX(FOP_FADD)]  = {vfp_double_fadd, 0},
+       [FOP_TO_IDX(FOP_FSUB)]  = {vfp_double_fsub, 0},
+       [FOP_TO_IDX(FOP_FDIV)]  = {vfp_double_fdiv, 0},
 };
 
 #define FREG_BANK(x)   ((x) & 0x0c)
@@ -1131,39 +1131,39 @@ u32 vfp_double_cpdo(u32 inst, u32 fpscr)
        unsigned int dn = vfp_get_dn(inst);
        unsigned int dm = vfp_get_dm(inst);
        unsigned int vecitr, veclen, vecstride;
-       u32 (*fop)(int, int, s32, u32);
+       struct op *fop;
 
-       veclen = fpscr & FPSCR_LENGTH_MASK;
        vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK)) * 2;
 
+       fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
        /*
         * fcvtds takes an sN register number as destination, not dN.
         * It also always operates on scalars.
         */
-       if ((inst & FEXT_MASK) == FEXT_FCVT) {
-               veclen = 0;
+       if (fop->flags & OP_SD)
                dest = vfp_get_sd(inst);
-       else
+       else
                dest = vfp_get_dd(inst);
 
        /*
         * If destination bank is zero, vector length is always '1'.
         * ARM DDI0100F C5.1.3, C5.3.2.
         */
-       if (FREG_BANK(dest) == 0)
+       if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0))
                veclen = 0;
+       else
+               veclen = fpscr & FPSCR_LENGTH_MASK;
 
        pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
                 (veclen >> FPSCR_LENGTH_BIT) + 1);
 
-       fop = (op == FOP_EXT) ? fop_extfns[FEXT_TO_IDX(inst)] : fop_fns[FOP_TO_IDX(op)];
-       if (!fop)
+       if (!fop->fn)
                goto invalid;
 
        for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
                u32 except;
 
-               if (op == FOP_EXT && (inst & FEXT_MASK) == FEXT_FCVT)
+               if (op == FOP_EXT && (fop->flags & OP_SD))
                        pr_debug("VFP: itr%d (s%u) = op[%u] (d%u)\n",
                                 vecitr >> FPSCR_LENGTH_BIT,
                                 dest, dn, dm);
@@ -1176,7 +1176,7 @@ u32 vfp_double_cpdo(u32 inst, u32 fpscr)
                                 vecitr >> FPSCR_LENGTH_BIT,
                                 dest, dn, FOP_TO_IDX(op), dm);
 
-               except = fop(dest, dn, dm, fpscr);
+               except = fop->fn(dest, dn, dm, fpscr);
                pr_debug("VFP: itr%d: exceptions=%08x\n",
                         vecitr >> FPSCR_LENGTH_BIT, except);
 
index 8f6c179cafbe54e6b76cb09109bf4ba2d077947b..4f717d72eb6bd9bfc0364f2e06d9a60486891244 100644 (file)
@@ -702,22 +702,22 @@ static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr)
        return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO);
 }
 
-static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = {
-       [FEXT_TO_IDX(FEXT_FCPY)]        = vfp_single_fcpy,
-       [FEXT_TO_IDX(FEXT_FABS)]        = vfp_single_fabs,
-       [FEXT_TO_IDX(FEXT_FNEG)]        = vfp_single_fneg,
-       [FEXT_TO_IDX(FEXT_FSQRT)]       = vfp_single_fsqrt,
-       [FEXT_TO_IDX(FEXT_FCMP)]        = vfp_single_fcmp,
-       [FEXT_TO_IDX(FEXT_FCMPE)]       = vfp_single_fcmpe,
-       [FEXT_TO_IDX(FEXT_FCMPZ)]       = vfp_single_fcmpz,
-       [FEXT_TO_IDX(FEXT_FCMPEZ)]      = vfp_single_fcmpez,
-       [FEXT_TO_IDX(FEXT_FCVT)]        = vfp_single_fcvtd,
-       [FEXT_TO_IDX(FEXT_FUITO)]       = vfp_single_fuito,
-       [FEXT_TO_IDX(FEXT_FSITO)]       = vfp_single_fsito,
-       [FEXT_TO_IDX(FEXT_FTOUI)]       = vfp_single_ftoui,
-       [FEXT_TO_IDX(FEXT_FTOUIZ)]      = vfp_single_ftouiz,
-       [FEXT_TO_IDX(FEXT_FTOSI)]       = vfp_single_ftosi,
-       [FEXT_TO_IDX(FEXT_FTOSIZ)]      = vfp_single_ftosiz,
+static struct op fops_ext[32] = {
+       [FEXT_TO_IDX(FEXT_FCPY)]        = {vfp_single_fcpy, 0},
+       [FEXT_TO_IDX(FEXT_FABS)]        = {vfp_single_fabs, 0},
+       [FEXT_TO_IDX(FEXT_FNEG)]        = {vfp_single_fneg, 0},
+       [FEXT_TO_IDX(FEXT_FSQRT)]       = {vfp_single_fsqrt, 0},
+       [FEXT_TO_IDX(FEXT_FCMP)]        = {vfp_single_fcmp, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FCMPE)]       = {vfp_single_fcmpe, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FCMPZ)]       = {vfp_single_fcmpz, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FCMPEZ)]      = {vfp_single_fcmpez, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FCVT)]        = {vfp_single_fcvtd, (OP_SD|OP_SCALAR)},
+       [FEXT_TO_IDX(FEXT_FUITO)]       = {vfp_single_fuito, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FSITO)]       = {vfp_single_fsito, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FTOUI)]       = {vfp_single_ftoui, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FTOUIZ)]      = {vfp_single_ftouiz, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FTOSI)]       = {vfp_single_ftosi, OP_SCALAR},
+       [FEXT_TO_IDX(FEXT_FTOSIZ)]      = {vfp_single_ftosiz, OP_SCALAR},
 };
 
 
@@ -1151,16 +1151,16 @@ static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr)
        return FPSCR_IOC;
 }
 
-static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = {
-       [FOP_TO_IDX(FOP_FMAC)]  = vfp_single_fmac,
-       [FOP_TO_IDX(FOP_FNMAC)] = vfp_single_fnmac,
-       [FOP_TO_IDX(FOP_FMSC)]  = vfp_single_fmsc,
-       [FOP_TO_IDX(FOP_FNMSC)] = vfp_single_fnmsc,
-       [FOP_TO_IDX(FOP_FMUL)]  = vfp_single_fmul,
-       [FOP_TO_IDX(FOP_FNMUL)] = vfp_single_fnmul,
-       [FOP_TO_IDX(FOP_FADD)]  = vfp_single_fadd,
-       [FOP_TO_IDX(FOP_FSUB)]  = vfp_single_fsub,
-       [FOP_TO_IDX(FOP_FDIV)]  = vfp_single_fdiv,
+static struct op fops[16] = {
+       [FOP_TO_IDX(FOP_FMAC)]  = {vfp_single_fmac, 0},
+       [FOP_TO_IDX(FOP_FNMAC)] = {vfp_single_fnmac, 0},
+       [FOP_TO_IDX(FOP_FMSC)]  = {vfp_single_fmsc, 0},
+       [FOP_TO_IDX(FOP_FNMSC)] = {vfp_single_fnmsc, 0},
+       [FOP_TO_IDX(FOP_FMUL)]  = {vfp_single_fmul, 0},
+       [FOP_TO_IDX(FOP_FNMUL)] = {vfp_single_fnmul, 0},
+       [FOP_TO_IDX(FOP_FADD)]  = {vfp_single_fadd, 0},
+       [FOP_TO_IDX(FOP_FSUB)]  = {vfp_single_fsub, 0},
+       [FOP_TO_IDX(FOP_FDIV)]  = {vfp_single_fdiv, 0},
 };
 
 #define FREG_BANK(x)   ((x) & 0x18)
@@ -1174,19 +1174,18 @@ u32 vfp_single_cpdo(u32 inst, u32 fpscr)
        unsigned int sn = vfp_get_sn(inst);
        unsigned int sm = vfp_get_sm(inst);
        unsigned int vecitr, veclen, vecstride;
-       u32 (*fop)(int, int, s32, u32);
+       struct op *fop;
 
-       veclen = fpscr & FPSCR_LENGTH_MASK;
        vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
 
+       fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
        /*
         * fcvtsd takes a dN register number as destination, not sN.
         * Technically, if bit 0 of dd is set, this is an invalid
         * instruction.  However, we ignore this for efficiency.
         * It also only operates on scalars.
         */
-       if ((inst & FEXT_MASK) == FEXT_FCVT) {
-               veclen = 0;
+       if (fop->flags & OP_SD) {
                dest = vfp_get_dd(inst);
        } else
                dest = vfp_get_sd(inst);
@@ -1195,21 +1194,22 @@ u32 vfp_single_cpdo(u32 inst, u32 fpscr)
         * If destination bank is zero, vector length is always '1'.
         * ARM DDI0100F C5.1.3, C5.3.2.
         */
-       if (FREG_BANK(dest) == 0)
+       if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0))
                veclen = 0;
+       else
+               veclen = fpscr & FPSCR_LENGTH_MASK;
 
        pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
                 (veclen >> FPSCR_LENGTH_BIT) + 1);
 
-       fop = (op == FOP_EXT) ? fop_extfns[FEXT_TO_IDX(inst)] : fop_fns[FOP_TO_IDX(op)];
-       if (!fop)
+       if (!fop->fn)
                goto invalid;
 
        for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
                s32 m = vfp_get_float(sm);
                u32 except;
 
-               if (op == FOP_EXT && (inst & FEXT_MASK) == FEXT_FCVT)
+               if (op == FOP_EXT && (fop->flags & OP_SD))
                        pr_debug("VFP: itr%d (d%u) = op[%u] (s%u=%08x)\n",
                                 vecitr >> FPSCR_LENGTH_BIT, dest, sn, sm, m);
                else if (op == FOP_EXT)
@@ -1220,7 +1220,7 @@ u32 vfp_single_cpdo(u32 inst, u32 fpscr)
                                 vecitr >> FPSCR_LENGTH_BIT, dest, sn,
                                 FOP_TO_IDX(op), sm, m);
 
-               except = fop(dest, sn, m, fpscr);
+               except = fop->fn(dest, sn, m, fpscr);
                pr_debug("VFP: itr%d: exceptions=%08x\n",
                         vecitr >> FPSCR_LENGTH_BIT, except);