SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDValue Data = Op.getOperand(0); VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); DebugLoc DL = Op.getDebugLoc(); EVT DVT = Data.getValueType(); EVT BVT = BaseType->getVT(); unsigned baseBits = BVT.getScalarType().getSizeInBits(); unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; unsigned shiftBits = srcBits - baseBits; if (srcBits < 32) { // If the op is less than 32 bits, then it needs to extend to 32bits // so it can properly keep the upper bits valid. EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); shiftBits = 32 - baseBits; DVT = IVT; } SDValue Shift = DAG.getConstant(shiftBits, DVT); // Shift left by 'Shift' bits. Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); // Signed shift Right by 'Shift' bits. Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); if (srcBits < 32) { // Once the sign extension is done, the op needs to be converted to // its original type. Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); } return Data; }
bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { return false; } else { return true; } }
// The backend supports 32 and 64 bit floating point immediates bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { return true; } else { return false; } }
SDValue AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { EVT OVT = Op.getValueType(); SDValue DST; if (OVT.getScalarType() == MVT::i64) { DST = LowerSDIV64(Op, DAG); } else if (OVT.getScalarType() == MVT::i32) { DST = LowerSDIV32(Op, DAG); } else if (OVT.getScalarType() == MVT::i16 || OVT.getScalarType() == MVT::i8) { DST = LowerSDIV24(Op, DAG); } else { DST = SDValue(Op.getNode(), 0); } return DST; }
SDValue AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT OVT = Op.getValueType(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); MVT INTTY; MVT FLTTY; if (!OVT.isVector()) { INTTY = MVT::i32; FLTTY = MVT::f32; } else if (OVT.getVectorNumElements() == 2) { INTTY = MVT::v2i32; FLTTY = MVT::v2f32; } else if (OVT.getVectorNumElements() == 4) { INTTY = MVT::v4i32; FLTTY = MVT::v4f32; } unsigned bitsize = OVT.getScalarType().getSizeInBits(); // char|short jq = ia ^ ib; SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); // jq = jq >> (bitsize - 2) jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); // jq = jq | 0x1 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); // jq = (int)jq jq = DAG.getSExtOrTrunc(jq, DL, INTTY); // int ia = (int)LHS; SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); // int ib, (int)RHS; SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); // float fa = (float)ia; SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); // float fb = (float)ib; SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); // float fq = native_divide(fa, fb); SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb); // fq = trunc(fq); fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); // float fqneg = -fq; SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); // float fr = mad(fqneg, fb, fa); SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa); // int iq = (int)fq; SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); // fr = fabs(fr); fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); // fb = fabs(fb); fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); // int cv = fr >= fb; SDValue cv; if (INTTY == MVT::i32) { cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); } else { cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); } // jq = (cv ? jq : 0); jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq, DAG.getConstant(0, OVT)); // dst = iq + jq; iq = DAG.getSExtOrTrunc(iq, DL, OVT); iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); return iq; }
bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) { if (CallInst *CI = dyn_cast<CallInst>(J)) { // Inline ASM is okay, unless it clobbers the ctr register. if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { if (asmClobbersCTR(IA)) return true; continue; } if (Function *F = CI->getCalledFunction()) { // Most intrinsics don't become function calls, but some might. // sin, cos, exp and log are always calls. unsigned Opcode = 0; if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { switch (F->getIntrinsicID()) { default: continue; // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr // we're definitely using CTR. case Intrinsic::ppc_is_decremented_ctr_nonzero: case Intrinsic::ppc_mtctr: return true; // VisualStudio defines setjmp as _setjmp #if defined(_MSC_VER) && defined(setjmp) && \ !defined(setjmp_undefined_for_msvc) # pragma push_macro("setjmp") # undef setjmp # define setjmp_undefined_for_msvc #endif case Intrinsic::setjmp: #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) // let's return it to _setjmp state # pragma pop_macro("setjmp") # undef setjmp_undefined_for_msvc #endif case Intrinsic::longjmp: // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp // because, although it does clobber the counter register, the // control can't then return to inside the loop unless there is also // an eh_sjlj_setjmp. case Intrinsic::eh_sjlj_setjmp: case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: case Intrinsic::powi: case Intrinsic::log: case Intrinsic::log2: case Intrinsic::log10: case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::pow: case Intrinsic::sin: case Intrinsic::cos: return true; case Intrinsic::copysign: if (CI->getArgOperand(0)->getType()->getScalarType()-> isPPC_FP128Ty()) return true; else continue; // ISD::FCOPYSIGN is never a library call. case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; case Intrinsic::round: Opcode = ISD::FROUND; break; case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; } } // PowerPC does not use [US]DIVREM or other library calls for // operations on regular types which are not otherwise library calls // (i.e. soft float or atomics). If adapting for targets that do, // additional care is required here. LibFunc Func; if (!F->hasLocalLinkage() && F->hasName() && LibInfo && LibInfo->getLibFunc(F->getName(), Func) && LibInfo->hasOptimizedCodeGen(Func)) { // Non-read-only functions are never treated as intrinsics. if (!CI->onlyReadsMemory()) return true; // Conversion happens only for FP calls. if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) return true; switch (Func) { default: return true; case LibFunc_copysign: case LibFunc_copysignf: continue; // ISD::FCOPYSIGN is never a library call. case LibFunc_copysignl: return true; case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl: continue; // ISD::FABS is never a library call. case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: Opcode = ISD::FSQRT; break; case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl: Opcode = ISD::FFLOOR; break; case LibFunc_nearbyint: case LibFunc_nearbyintf: case LibFunc_nearbyintl: Opcode = ISD::FNEARBYINT; break; case LibFunc_ceil: case LibFunc_ceilf: case LibFunc_ceill: Opcode = ISD::FCEIL; break; case LibFunc_rint: case LibFunc_rintf: case LibFunc_rintl: Opcode = ISD::FRINT; break; case LibFunc_round: case LibFunc_roundf: case LibFunc_roundl: Opcode = ISD::FROUND; break; case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl: Opcode = ISD::FTRUNC; break; case LibFunc_fmin: case LibFunc_fminf: case LibFunc_fminl: Opcode = ISD::FMINNUM; break; case LibFunc_fmax: case LibFunc_fmaxf: case LibFunc_fmaxl: Opcode = ISD::FMAXNUM; break; } } if (Opcode) { EVT EVTy = TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true); if (EVTy == MVT::Other) return true; if (TLI->isOperationLegalOrCustom(Opcode, EVTy)) continue; else if (EVTy.isVector() && TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType())) continue; return true; } } return true; } else if (isa<BinaryOperator>(J) && J->getType()->getScalarType()->isPPC_FP128Ty()) { // Most operations on ppc_f128 values become calls. return true; } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { CastInst *CI = cast<CastInst>(J); if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) || isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType())) return true; } else if (isLargeIntegerTy(!TM->isPPC64(), J->getType()->getScalarType()) && (J->getOpcode() == Instruction::UDiv || J->getOpcode() == Instruction::SDiv || J->getOpcode() == Instruction::URem || J->getOpcode() == Instruction::SRem)) { return true; } else if (!TM->isPPC64() && isLargeIntegerTy(false, J->getType()->getScalarType()) && (J->getOpcode() == Instruction::Shl || J->getOpcode() == Instruction::AShr || J->getOpcode() == Instruction::LShr)) { // Only on PPC32, for 128-bit integers (specifically not 64-bit // integers), these might be runtime calls. return true; } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { // On PowerPC, indirect jumps use the counter register. return true; } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) return true; } // FREM is always a call. if (J->getOpcode() == Instruction::FRem) return true; if (STI->useSoftFloat()) { switch(J->getOpcode()) { case Instruction::FAdd: case Instruction::FSub: case Instruction::FMul: case Instruction::FDiv: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::UIToFP: case Instruction::SIToFP: case Instruction::FCmp: return true; } } for (Value *Operand : J->operands()) if (memAddrUsesCTR(*TM, Operand)) return true; } return false; }