static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops, IRBuilderBase *Builder, const Twine &Name = "", Instruction *FMFSource = nullptr) { CallInst *CI = CallInst::Create(Callee, Ops, Name); if (FMFSource) CI->copyFastMathFlags(FMFSource); Builder->GetInsertBlock()->getInstList().insert(Builder->GetInsertPoint(),CI); Builder->SetInstDebugLocation(CI); return CI; }
// The fractional part of a float is enough to accurately represent up to // a 24-bit signed integer. Value* AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder, BinaryOperator &I, Value *Num, Value *Den, bool IsDiv, bool IsSigned) const { assert(Num->getType()->isIntegerTy(32)); const DataLayout &DL = Mod->getDataLayout(); unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I); if (LHSSignBits < 9) return nullptr; unsigned RHSSignBits = ComputeNumSignBits(Den, DL, 0, AC, &I); if (RHSSignBits < 9) return nullptr; unsigned SignBits = std::min(LHSSignBits, RHSSignBits); unsigned DivBits = 32 - SignBits; if (IsSigned) ++DivBits; Type *Ty = Num->getType(); Type *I32Ty = Builder.getInt32Ty(); Type *F32Ty = Builder.getFloatTy(); ConstantInt *One = Builder.getInt32(1); Value *JQ = One; if (IsSigned) { // char|short jq = ia ^ ib; JQ = Builder.CreateXor(Num, Den); // jq = jq >> (bitsize - 2) JQ = Builder.CreateAShr(JQ, Builder.getInt32(30)); // jq = jq | 0x1 JQ = Builder.CreateOr(JQ, One); } // int ia = (int)LHS; Value *IA = Num; // int ib, (int)RHS; Value *IB = Den; // float fa = (float)ia; Value *FA = IsSigned ? Builder.CreateSIToFP(IA, F32Ty) : Builder.CreateUIToFP(IA, F32Ty); // float fb = (float)ib; Value *FB = IsSigned ? Builder.CreateSIToFP(IB,F32Ty) : Builder.CreateUIToFP(IB,F32Ty); Value *RCP = Builder.CreateFDiv(ConstantFP::get(F32Ty, 1.0), FB); Value *FQM = Builder.CreateFMul(FA, RCP); // fq = trunc(fqm); CallInst* FQ = Builder.CreateIntrinsic(Intrinsic::trunc, { FQM }); FQ->copyFastMathFlags(Builder.getFastMathFlags()); // float fqneg = -fq; Value *FQNeg = Builder.CreateFNeg(FQ); // float fr = mad(fqneg, fb, fa); Value *FR = Builder.CreateIntrinsic(Intrinsic::amdgcn_fmad_ftz, { FQNeg, FB, FA }, FQ); // int iq = (int)fq; Value *IQ = IsSigned ? Builder.CreateFPToSI(FQ, I32Ty) : Builder.CreateFPToUI(FQ, I32Ty); // fr = fabs(fr); FR = Builder.CreateIntrinsic(Intrinsic::fabs, { FR }, FQ); // fb = fabs(fb); FB = Builder.CreateIntrinsic(Intrinsic::fabs, { FB }, FQ); // int cv = fr >= fb; Value *CV = Builder.CreateFCmpOGE(FR, FB); // jq = (cv ? jq : 0); JQ = Builder.CreateSelect(CV, JQ, Builder.getInt32(0)); // dst = iq + jq; Value *Div = Builder.CreateAdd(IQ, JQ); Value *Res = Div; if (!IsDiv) { // Rem needs compensation, it's easier to recompute it Value *Rem = Builder.CreateMul(Div, Den); Res = Builder.CreateSub(Num, Rem); } // Truncate to number of bits this divide really is. if (IsSigned) { Res = Builder.CreateTrunc(Res, Builder.getIntNTy(DivBits)); Res = Builder.CreateSExt(Res, Ty); } else { ConstantInt *TruncMask = Builder.getInt32((UINT64_C(1) << DivBits) - 1); Res = Builder.CreateAnd(Res, TruncMask); } return Res; }