// Floating Reciprocal Estimate Single
static void
fres(ThreadState *state, Instruction instr)
{
   double b, d;
   b = state->fpr[instr.frB].paired0;

   state->fpscr.vxsnan |= is_signalling_nan(b);

   d = ppc_estimate_reciprocal(b);

   updateFPSCR(state);
   updateFPRF(state, d);
   state->fpr[instr.frD].paired0 = d;

   if (instr.rc) {
      updateFloatConditionRegister(state);
   }
}
// Floating Reciprocal Square Root Estimate
static void
frsqrte(ThreadState *state, Instruction instr)
{
   double b, d;
   b = state->fpr[instr.frB].paired0;
   d = 1.0 / std::sqrt(b);

   auto vxsnan = is_signalling_nan(b);
   state->fpscr.vxsnan |= vxsnan;
   state->fpscr.vxsqrt |= vxsnan;

   updateFPSCR(state);
   updateFPRF(state, d);
   state->fpr[instr.frD].paired0 = d;

   if (instr.rc) {
      updateFloatConditionRegister(state);
   }
}
// Floating Convert to Integer Word
static void
fctiw(ThreadState *state, Instruction instr)
{
   double b;
   int32_t bi;
   b = state->fpr[instr.frB].paired0;

   if (b > static_cast<double>(INT_MAX)) {
      bi = INT_MAX;
      state->fpscr.vxcvi = 1;
   } else if (b < static_cast<double>(INT_MIN)) {
      bi = INT_MIN;
      state->fpscr.vxcvi = 1;
   } else {
      switch (state->fpscr.rn) {
      case FloatingPointRoundMode::Nearest:
         bi = static_cast<int32_t>(std::round(b));
         break;
      case FloatingPointRoundMode::Positive:
         bi = static_cast<int32_t>(std::ceil(b));
         break;
      case FloatingPointRoundMode::Negative:
         bi = static_cast<int32_t>(std::floor(b));
         break;
      case FloatingPointRoundMode::Zero:
         bi = static_cast<int32_t>(std::trunc(b));
         break;
      }
   }

   auto vxsnan = is_signalling_nan(b);
   state->fpscr.vxsnan |= vxsnan;
   state->fpscr.vxcvi |= vxsnan;

   updateFPSCR(state);
   state->fpr[instr.frD].iw0 = bi;
   state->fpr[instr.frD].iw1 = 0xFFF80000 | (is_negative_zero(b) ? 1 : 0);

   if (instr.rc) {
      updateFloatConditionRegister(state);
   }
}
// Reciprocal Square Root
static void
ps_rsqrte(cpu::Core *state, Instruction instr)
{
   const double b0 = state->fpr[instr.frB].paired0;
   const double b1 = state->fpr[instr.frB].paired1;

   const bool vxsnan0 = is_signalling_nan(b0);
   const bool vxsnan1 = is_signalling_nan(b1);
   const bool vxsqrt0 = !vxsnan0 && std::signbit(b0) && !is_zero(b0);
   const bool vxsqrt1 = !vxsnan1 && std::signbit(b1) && !is_zero(b1);
   const bool zx0 = is_zero(b0);
   const bool zx1 = is_zero(b1);

   const uint32_t oldFPSCR = state->fpscr.value;
   state->fpscr.vxsnan |= vxsnan0 || vxsnan1;
   state->fpscr.vxsqrt |= vxsqrt0 || vxsqrt1;
   state->fpscr.zx |= zx0 || zx1;

   double d0, d1;
   bool write = true;
   if (((vxsnan0 || vxsqrt0) && state->fpscr.ve) || (zx0 && state->fpscr.ze)) {
      write = false;
   } else {
      d0 = ppc_estimate_reciprocal_root(b0);
      updateFPRF(state, d0);
   }
   if (((vxsnan1 || vxsqrt1) && state->fpscr.ve) || (zx1 && state->fpscr.ze)) {
      write = false;
   } else {
      d1 = ppc_estimate_reciprocal_root(b1);
   }

   if (write) {
      // ps_rsqrte behaves strangely when the result's magnitude is out of
      // range: ps0 keeps its double-precision exponent, while ps1 appears
      // to get an arbitrary value from the floating-point circuitry.  The
      // details of how ps1's exponent is affected are unknown, but the
      // logic below works for double-precision inputs 0x7FE...FFF (maximum
      // normal) and 0x000...001 (minimum denormal).

      auto bits0 = get_float_bits(d0);
      bits0.mantissa &= UINT64_C(0xFFFFFE0000000);
      state->fpr[instr.frD].paired0 = bits0.v;

      auto bits1 = get_float_bits(d1);
      if (bits1.exponent == 0) {
         // Leave as zero (reciprocal square root can never be a denormal).
      } else if (bits1.exponent < 1151) {
         int8_t exponent8 = (bits1.exponent - 1023) & 0xFF;
         bits1.exponent = 1023 + exponent8;
      } else if (bits1.exponent < 2047) {
         bits1.exponent = 1022;
      }
      bits1.mantissa &= UINT64_C(0xFFFFFE0000000);
      state->fpr[instr.frD].paired1 = bits1.v;
   }

   updateFPSCR(state, oldFPSCR);
   if (instr.rc) {
      updateFloatConditionRegister(state);
   }
}
static bool
fmaSingle(cpu::Core *state, Instruction instr, float *result)
{
   double a, b, c;
   if (slotAB == 0) {
      a = state->fpr[instr.frA].paired0;
      b = state->fpr[instr.frB].paired0;
   } else {
      a = state->fpr[instr.frA].paired1;
      b = state->fpr[instr.frB].paired1;
   }
   if (slotC == 0) {
      c = state->fpr[instr.frC].paired0;
   } else {
      c = state->fpr[instr.frC].paired1;
   }
   const double addend = (flags & FMASubtract) ? -b : b;

   const bool vxsnan = is_signalling_nan(a) || is_signalling_nan(b) || is_signalling_nan(c);
   const bool vximz = (is_infinity(a) && is_zero(c)) || (is_zero(a) && is_infinity(c));
   const bool vxisi = (!vximz && !is_nan(a) && !is_nan(c)
                       && (is_infinity(a) || is_infinity(c)) && is_infinity(b)
                       && (std::signbit(a) ^ std::signbit(c)) != std::signbit(addend));

   state->fpscr.vxsnan |= vxsnan;
   state->fpscr.vxisi |= vxisi;
   state->fpscr.vximz |= vximz;

   if ((vxsnan || vxisi || vximz) && state->fpscr.ve) {
      return false;
   }

   float d;
   if (is_nan(a)) {
      d = make_quiet(truncate_double(a));
   } else if (is_nan(b)) {
      d = make_quiet(truncate_double(b));
   } else if (is_nan(c)) {
      d = make_quiet(truncate_double(c));
   } else if (vxisi || vximz) {
      d = make_nan<float>();
   } else {
      if (slotC == 0) {
         roundForMultiply(&a, &c);  // Not necessary for slot 1.
      }

      double d64 = std::fma(a, c, addend);
      if (state->fpscr.rn == espresso::FloatingPointRoundMode::Nearest) {
         d = roundFMAResultToSingle(d64, a, addend, c);
      } else {
         d = static_cast<float>(d64);
      }

      if (possibleUnderflow<float>(d)) {
         const int oldRound = fegetround();
         fesetround(FE_TOWARDZERO);

         volatile double addendTemp = addend;
         volatile float dummy;
         dummy = (float)std::fma(a, c, addendTemp);

         fesetround(oldRound);
      }

      if (flags & FMANegate) {
         d = -d;
      }
   }

   *result = d;
   return true;
}
static bool
psArithSingle(cpu::Core *state, Instruction instr, float *result)
{
   double a, b;
   if (slotA == 0) {
      a = state->fpr[instr.frA].paired0;
   } else {
      a = state->fpr[instr.frA].paired1;
   }
   if (slotB == 0) {
      b = state->fpr[op == PSMul ? instr.frC : instr.frB].paired0;
   } else {
      b = state->fpr[op == PSMul ? instr.frC : instr.frB].paired1;
   }

   const bool vxsnan = is_signalling_nan(a) || is_signalling_nan(b);
   bool vxisi, vximz, vxidi, vxzdz, zx;
   switch (op) {
   case PSAdd:
      vxisi = is_infinity(a) && is_infinity(b) && std::signbit(a) != std::signbit(b);
      vximz = false;
      vxidi = false;
      vxzdz = false;
      zx = false;
      break;
   case PSSub:
      vxisi = is_infinity(a) && is_infinity(b) && std::signbit(a) == std::signbit(b);
      vximz = false;
      vxidi = false;
      vxzdz = false;
      zx = false;
      break;
   case PSMul:
      vxisi = false;
      vximz = (is_infinity(a) && is_zero(b)) || (is_zero(a) && is_infinity(b));
      vxidi = false;
      vxzdz = false;
      zx = false;
      break;
   case PSDiv:
      vxisi = false;
      vximz = false;
      vxidi = is_infinity(a) && is_infinity(b);
      vxzdz = is_zero(a) && is_zero(b);
      zx = !(vxzdz || vxsnan) && is_zero(b);
      break;
   }

   state->fpscr.vxsnan |= vxsnan;
   state->fpscr.vxisi |= vxisi;
   state->fpscr.vximz |= vximz;
   state->fpscr.vxidi |= vxidi;
   state->fpscr.vxzdz |= vxzdz;
   state->fpscr.zx |= zx;

   const bool vxEnabled = (vxsnan || vxisi || vximz || vxidi || vxzdz) && state->fpscr.ve;
   const bool zxEnabled = zx && state->fpscr.ze;
   if (vxEnabled || zxEnabled) {
      return false;
   }

   float d;
   if (is_nan(a)) {
      d = make_quiet(truncate_double(a));
   } else if (is_nan(b)) {
      d = make_quiet(truncate_double(b));
   } else if (vxisi || vximz || vxidi || vxzdz) {
      d = make_nan<float>();
   } else {
      switch (op) {
      case PSAdd:
         d = static_cast<float>(a + b);
         break;
      case PSSub:
         d = static_cast<float>(a - b);
         break;
      case PSMul:
         if (slotB == 0) {
            roundForMultiply(&a, &b);  // Not necessary for slot 1.
         }
         d = static_cast<float>(a * b);
         break;
      case PSDiv:
         d = static_cast<float>(a / b);
         break;
      }

      if (possibleUnderflow<float>(d)) {
         const int oldRound = fegetround();
         fesetround(FE_TOWARDZERO);

         volatile double bTemp = b;
         volatile float dummy;
         switch (op) {
         case PSAdd:
            dummy = static_cast<float>(a + bTemp);
            break;
         case PSSub:
            dummy = static_cast<float>(a - bTemp);
            break;
         case PSMul:
            dummy = static_cast<float>(a * bTemp);
            break;
         case PSDiv:
            dummy = static_cast<float>(a / bTemp);
            break;
         }
         fesetround(oldRound);
      }
   }

   *result = d;
   return true;
}
Example #7
0
int decode_bytecode_pump(decode_state_t* ds, const unsigned char* pData, size_t iLength)
{
    decoded_prototype_t* proto;

    /* Continue the read operation which caused the yield. */
    ds->chunk = pData;
    ds->chunklen = iLength;
    if(!read(ds, ds->readtarget, ds->readlen))
        return DECODE_YIELD;

    proto = ds->stack[ds->level - 1];

    switch(ds->yieldpos)
    {
    case DECODE_YIELDPOS_HEADER:
        if(!decode_header(ds))
            return DECODE_FAIL;

        /* Main prototype decoding function */
ENTER_CHILD_PROTO:
        if(ds->level >= LUAI_MAXCCALLS)
            return DECODE_UNSAFE;
        proto = alloc_proto(ds);
        if(proto == NULL)
            return DECODE_ERROR_MEM;
        ds->stack[ds->level++] = proto;

        READ(NULL, ds->sizeint * 2);
        READ(ds->buffer, 3);

        proto->numparams = ds->buffer[0];
        proto->is_vararg = ds->buffer[1] != 0;
        proto->numregs = ds->buffer[2];

        /* Code */
        proto->instructionsize = ds->sizeins;
        READ_INT(&proto->numinstructions, ds->sizeint);
        if(proto->numinstructions == 0)
            return DECODE_UNSAFE;
        proto->code = (unsigned char*)ds->alloc(ds->allocud, NULL, 0,
                                                ds->sizeins * proto->numinstructions + sizeof(int));
        if(proto->code == NULL)
            return DECODE_ERROR_MEM;
        READ(proto->code, ds->sizeins * proto->numinstructions);
        if(ds->swapendian)
        {
            for(i = 0; i < proto->numinstructions; ++i)
                byteswap(proto->code + i * ds->sizeins, ds->sizeins);
        }

        /* Constants (excluding prototypes) */
        READ_INT(&proto->numconstants, ds->sizeint);
        proto->constant_types = (unsigned char*)ds->alloc(ds->allocud,
                                NULL, 0, proto->numconstants);
        if(proto->numconstants != 0 && proto->constant_types == NULL)
            return DECODE_ERROR_MEM;
        for(i = 0; i < proto->numconstants; ++i)
        {
            unsigned char t;
            READ(proto->constant_types + i, 1);
            t = proto->constant_types[i];
            /* NB: Cannot use switch statement here, as possibly yielding reads
               cannot be in a nested swtich. */
            if(t == LUA_TSTRING)
            {
                SKIP_STRING_1();
                SKIP_STRING_2();
            }
            else if(t == LUA_TNUMBER)
            {
                READ(ds->buffer, ds->sizenum);
                if(is_signalling_nan(ds, ds->buffer))
                    return DECODE_UNSAFE;
            }
            else if(t == LUA_TBOOLEAN)
            {
                READ(ds->buffer, 1);
                if(ds->buffer[0] > 1)
                    return DECODE_UNSAFE;
            }
            else if(t != LUA_TNIL)
            {
                return DECODE_FAIL;
            }
        }

        /* Prototypes */
        READ_INT(&proto->numprototypes, ds->sizeint);
        proto->prototypes = (decoded_prototype_t**)ds->alloc(ds->allocud,
                            NULL, 0, sizeof(decoded_prototype_t*) * proto->numprototypes);
        if(proto->numprototypes != 0 && proto->prototypes == NULL)
            return DECODE_ERROR_MEM;
        for(i = 0; i < proto->numprototypes; ++i)
            proto->prototypes[i] = NULL;
        for(i = 0; i < proto->numprototypes; ++i)
        {
            /* Recursively decode the child prototype.
              The loop counter needs to be saved somewhere, as it will be
              overwritten during the recursion. For this, the numupvalues field
              is used, as its value is not important at this stage of the
              decoding process. The result of the recursion is then pulled out
              of the stack and stored in the appropriate place. */
            proto->numupvalues = i;
            goto ENTER_CHILD_PROTO;
RESUME_PARENT_PROTO:
            i = proto->numupvalues;
            proto->prototypes[i] = ds->stack[ds->level];
        }

        /* Upvalues */
        READ_INT(&proto->numupvalues, ds->sizeint);
        proto->upvalue_instack = (bool*)ds->alloc(ds->allocud, NULL, 0,
                                 sizeof(bool) * proto->numupvalues);
        proto->upvalue_index = (unsigned char*)ds->alloc(ds->allocud, NULL, 0,
                               proto->numupvalues);
        if((proto->upvalue_instack == NULL || proto->upvalue_index == NULL)
                && proto->numupvalues != 0)
            return DECODE_ERROR_MEM;
        for(i = 0; i < proto->numupvalues; ++i)
        {
            READ(ds->buffer, 2);
            proto->upvalue_instack[i] = ds->buffer[0] != 0;
            proto->upvalue_index[i] = ds->buffer[1];
        }

        /* Debug information */
        SKIP_STRING_1();
        SKIP_STRING_2();
        READ_INT(&i, ds->sizeint);
        READ(NULL, ds->sizeint * i);
        READ_INT(&i, ds->sizeint);
        for(; i > 0; --i)
        {
            SKIP_STRING_1();
            SKIP_STRING_2();
            READ(NULL, ds->sizeint * 2);
        }
        READ_INT(&i, ds->sizeint);
        for(; i > 0; --i)
        {
            SKIP_STRING_1();
            SKIP_STRING_2();
        }

        if(--ds->level == 0)
        {
            if(ds->chunklen != 0) /* Data in epilogue? */
                return DECODE_FAIL;
            ds->yieldpos = DECODE_YIELDPOS_DONE;
            return DECODE_YIELD;
        }
        proto = ds->stack[ds->level - 1];
        goto RESUME_PARENT_PROTO;
    /* End of main prototype decoding function. */

    case DECODE_YIELDPOS_DONE:
        if(ds->chunklen == 0)
            return DECODE_YIELD;
        /* If this is being resumed, it means that there is spurious data
           beyond the end of the bytecode. In this case, the decoding should
           fail and not return a prototype, so the level field is set to 1 to
           ensure that the resulting prototype gets freed when the stack is
           freed. */
        ds->level = 1;
        return DECODE_FAIL;

    default:
        /* This should never happen, unless the yield/resume code is broken. */
        return DECODE_ERROR;
    }
}