SDValue
AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
{
  SDValue Data = Op.getOperand(0);
  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
  DebugLoc DL = Op.getDebugLoc();
  EVT DVT = Data.getValueType();
  EVT BVT = BaseType->getVT();
  unsigned baseBits = BVT.getScalarType().getSizeInBits();
  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
  unsigned shiftBits = srcBits - baseBits;
  if (srcBits < 32) {
    // If the op is less than 32 bits, then it needs to extend to 32bits
    // so it can properly keep the upper bits valid.
    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
    shiftBits = 32 - baseBits;
    DVT = IVT;
  }
  SDValue Shift = DAG.getConstant(shiftBits, DVT);
  // Shift left by 'Shift' bits.
  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
  // Signed shift Right by 'Shift' bits.
  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
  if (srcBits < 32) {
    // Once the sign extension is done, the op needs to be converted to
    // its original type.
    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
  }
  return Data;
}
Example #2
0
bool
AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
    return false;
  } else {
    return true;
  }
}
Example #3
0
// The backend supports 32 and 64 bit floating point immediates
bool
AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
    return true;
  } else {
    return false;
  }
}
Example #4
0
SDValue
AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
  EVT OVT = Op.getValueType();
  SDValue DST;
  if (OVT.getScalarType() == MVT::i64) {
    DST = LowerSDIV64(Op, DAG);
  } else if (OVT.getScalarType() == MVT::i32) {
    DST = LowerSDIV32(Op, DAG);
  } else if (OVT.getScalarType() == MVT::i16
      || OVT.getScalarType() == MVT::i8) {
    DST = LowerSDIV24(Op, DAG);
  } else {
    DST = SDValue(Op.getNode(), 0);
  }
  return DST;
}
SDValue
AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
{
  DebugLoc DL = Op.getDebugLoc();
  EVT OVT = Op.getValueType();
  SDValue LHS = Op.getOperand(0);
  SDValue RHS = Op.getOperand(1);
  MVT INTTY;
  MVT FLTTY;
  if (!OVT.isVector()) {
    INTTY = MVT::i32;
    FLTTY = MVT::f32;
  } else if (OVT.getVectorNumElements() == 2) {
    INTTY = MVT::v2i32;
    FLTTY = MVT::v2f32;
  } else if (OVT.getVectorNumElements() == 4) {
    INTTY = MVT::v4i32;
    FLTTY = MVT::v4f32;
  }
  unsigned bitsize = OVT.getScalarType().getSizeInBits();
  // char|short jq = ia ^ ib;
  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);

  // jq = jq >> (bitsize - 2)
  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); 

  // jq = jq | 0x1
  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));

  // jq = (int)jq
  jq = DAG.getSExtOrTrunc(jq, DL, INTTY);

  // int ia = (int)LHS;
  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);

  // int ib, (int)RHS;
  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);

  // float fa = (float)ia;
  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);

  // float fb = (float)ib;
  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);

  // float fq = native_divide(fa, fb);
  SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);

  // fq = trunc(fq);
  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);

  // float fqneg = -fq;
  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);

  // float fr = mad(fqneg, fb, fa);
  SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);

  // int iq = (int)fq;
  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);

  // fr = fabs(fr);
  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);

  // fb = fabs(fb);
  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);

  // int cv = fr >= fb;
  SDValue cv;
  if (INTTY == MVT::i32) {
    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
  } else {
    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
  }
  // jq = (cv ? jq : 0);
  jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq, 
      DAG.getConstant(0, OVT));
  // dst = iq + jq;
  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
  return iq;
}
Example #6
0
bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
  for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
       J != JE; ++J) {
    if (CallInst *CI = dyn_cast<CallInst>(J)) {
      // Inline ASM is okay, unless it clobbers the ctr register.
      if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
        if (asmClobbersCTR(IA))
          return true;
        continue;
      }

      if (Function *F = CI->getCalledFunction()) {
        // Most intrinsics don't become function calls, but some might.
        // sin, cos, exp and log are always calls.
        unsigned Opcode = 0;
        if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
          switch (F->getIntrinsicID()) {
          default: continue;
          // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
          // we're definitely using CTR.
          case Intrinsic::ppc_is_decremented_ctr_nonzero:
          case Intrinsic::ppc_mtctr:
            return true;

// VisualStudio defines setjmp as _setjmp
#if defined(_MSC_VER) && defined(setjmp) && \
                       !defined(setjmp_undefined_for_msvc)
#  pragma push_macro("setjmp")
#  undef setjmp
#  define setjmp_undefined_for_msvc
#endif

          case Intrinsic::setjmp:

#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
 // let's return it to _setjmp state
#  pragma pop_macro("setjmp")
#  undef setjmp_undefined_for_msvc
#endif

          case Intrinsic::longjmp:

          // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
          // because, although it does clobber the counter register, the
          // control can't then return to inside the loop unless there is also
          // an eh_sjlj_setjmp.
          case Intrinsic::eh_sjlj_setjmp:

          case Intrinsic::memcpy:
          case Intrinsic::memmove:
          case Intrinsic::memset:
          case Intrinsic::powi:
          case Intrinsic::log:
          case Intrinsic::log2:
          case Intrinsic::log10:
          case Intrinsic::exp:
          case Intrinsic::exp2:
          case Intrinsic::pow:
          case Intrinsic::sin:
          case Intrinsic::cos:
            return true;
          case Intrinsic::copysign:
            if (CI->getArgOperand(0)->getType()->getScalarType()->
                isPPC_FP128Ty())
              return true;
            else
              continue; // ISD::FCOPYSIGN is never a library call.
          case Intrinsic::sqrt:               Opcode = ISD::FSQRT;      break;
          case Intrinsic::floor:              Opcode = ISD::FFLOOR;     break;
          case Intrinsic::ceil:               Opcode = ISD::FCEIL;      break;
          case Intrinsic::trunc:              Opcode = ISD::FTRUNC;     break;
          case Intrinsic::rint:               Opcode = ISD::FRINT;      break;
          case Intrinsic::nearbyint:          Opcode = ISD::FNEARBYINT; break;
          case Intrinsic::round:              Opcode = ISD::FROUND;     break;
          case Intrinsic::minnum:             Opcode = ISD::FMINNUM;    break;
          case Intrinsic::maxnum:             Opcode = ISD::FMAXNUM;    break;
          case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO;      break;
          case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO;      break;
          }
        }

        // PowerPC does not use [US]DIVREM or other library calls for
        // operations on regular types which are not otherwise library calls
        // (i.e. soft float or atomics). If adapting for targets that do,
        // additional care is required here.

        LibFunc Func;
        if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
            LibInfo->getLibFunc(F->getName(), Func) &&
            LibInfo->hasOptimizedCodeGen(Func)) {
          // Non-read-only functions are never treated as intrinsics.
          if (!CI->onlyReadsMemory())
            return true;

          // Conversion happens only for FP calls.
          if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
            return true;

          switch (Func) {
          default: return true;
          case LibFunc_copysign:
          case LibFunc_copysignf:
            continue; // ISD::FCOPYSIGN is never a library call.
          case LibFunc_copysignl:
            return true;
          case LibFunc_fabs:
          case LibFunc_fabsf:
          case LibFunc_fabsl:
            continue; // ISD::FABS is never a library call.
          case LibFunc_sqrt:
          case LibFunc_sqrtf:
          case LibFunc_sqrtl:
            Opcode = ISD::FSQRT; break;
          case LibFunc_floor:
          case LibFunc_floorf:
          case LibFunc_floorl:
            Opcode = ISD::FFLOOR; break;
          case LibFunc_nearbyint:
          case LibFunc_nearbyintf:
          case LibFunc_nearbyintl:
            Opcode = ISD::FNEARBYINT; break;
          case LibFunc_ceil:
          case LibFunc_ceilf:
          case LibFunc_ceill:
            Opcode = ISD::FCEIL; break;
          case LibFunc_rint:
          case LibFunc_rintf:
          case LibFunc_rintl:
            Opcode = ISD::FRINT; break;
          case LibFunc_round:
          case LibFunc_roundf:
          case LibFunc_roundl:
            Opcode = ISD::FROUND; break;
          case LibFunc_trunc:
          case LibFunc_truncf:
          case LibFunc_truncl:
            Opcode = ISD::FTRUNC; break;
          case LibFunc_fmin:
          case LibFunc_fminf:
          case LibFunc_fminl:
            Opcode = ISD::FMINNUM; break;
          case LibFunc_fmax:
          case LibFunc_fmaxf:
          case LibFunc_fmaxl:
            Opcode = ISD::FMAXNUM; break;
          }
        }

        if (Opcode) {
          EVT EVTy =
              TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true);

          if (EVTy == MVT::Other)
            return true;

          if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
            continue;
          else if (EVTy.isVector() &&
                   TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
            continue;

          return true;
        }
      }

      return true;
    } else if (isa<BinaryOperator>(J) &&
               J->getType()->getScalarType()->isPPC_FP128Ty()) {
      // Most operations on ppc_f128 values become calls.
      return true;
    } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
               isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
      CastInst *CI = cast<CastInst>(J);
      if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
          CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
          isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
          isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
        return true;
    } else if (isLargeIntegerTy(!TM->isPPC64(),
                                J->getType()->getScalarType()) &&
               (J->getOpcode() == Instruction::UDiv ||
                J->getOpcode() == Instruction::SDiv ||
                J->getOpcode() == Instruction::URem ||
                J->getOpcode() == Instruction::SRem)) {
      return true;
    } else if (!TM->isPPC64() &&
               isLargeIntegerTy(false, J->getType()->getScalarType()) &&
               (J->getOpcode() == Instruction::Shl ||
                J->getOpcode() == Instruction::AShr ||
                J->getOpcode() == Instruction::LShr)) {
      // Only on PPC32, for 128-bit integers (specifically not 64-bit
      // integers), these might be runtime calls.
      return true;
    } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
      // On PowerPC, indirect jumps use the counter register.
      return true;
    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
      if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
        return true;
    }

    // FREM is always a call.
    if (J->getOpcode() == Instruction::FRem)
      return true;

    if (STI->useSoftFloat()) {
      switch(J->getOpcode()) {
      case Instruction::FAdd:
      case Instruction::FSub:
      case Instruction::FMul:
      case Instruction::FDiv:
      case Instruction::FPTrunc:
      case Instruction::FPExt:
      case Instruction::FPToUI:
      case Instruction::FPToSI:
      case Instruction::UIToFP:
      case Instruction::SIToFP:
      case Instruction::FCmp:
        return true;
      }
    }

    for (Value *Operand : J->operands())
      if (memAddrUsesCTR(*TM, Operand))
        return true;
  }

  return false;
}