bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32( IntrinsicInst &I) const { assert(I.getIntrinsicID() == Intrinsic::bitreverse && "I must be bitreverse intrinsic"); assert(needsPromotionToI32(I.getType()) && "I does not need promotion to i32"); IRBuilder<> Builder(&I); Builder.SetCurrentDebugLocation(I.getDebugLoc()); Type *I32Ty = getI32Ty(Builder, I.getType()); Function *I32 = Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty }); Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty); Value *ExtRes = Builder.CreateCall(I32, { ExtOp }); Value *LShrOp = Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType())); Value *TruncRes = Builder.CreateTrunc(LShrOp, I.getType()); I.replaceAllUsesWith(TruncRes); I.eraseFromParent(); return true; }
bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) { bool Changed = false; if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) && DA->isUniform(&I)) Changed |= promoteUniformBitreverseToI32(I); return Changed; }
void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) return; IRBuilder<> Builder(&I); // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I)) return; DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); const Function &ContainingFunction = *I.getParent()->getParent(); // FIXME: We should also try to get this value from the reqd_work_group_size // function attribute if it is available. unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction); int AllocaSize = WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy); if (AllocaSize > LocalMemAvailable) { DEBUG(dbgs() << " Not enough local memory to promote alloca.\n"); return; } std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); return; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); LocalMemAvailable -= AllocaSize; Function *F = I.getParent()->getParent(); Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize); GlobalVariable *GV = new GlobalVariable( *Mod, GVTy, false, GlobalValue::InternalLinkage, UndefValue::get(GVTy), Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS); GV->setUnnamedAddr(true); GV->setAlignment(I.getAlignment()); Value *TCntY, *TCntZ; std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder); Value *TIdX = getWorkitemID(Builder, 0); Value *TIdY = getWorkitemID(Builder, 1); Value *TIdZ = getWorkitemID(Builder, 2); Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true); Tmp0 = Builder.CreateMul(Tmp0, TIdX); Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true); Value *TID = Builder.CreateAdd(Tmp0, Tmp1); TID = Builder.CreateAdd(TID, TIdZ); Value *Indices[] = { Constant::getNullValue(Type::getInt32Ty(Mod->getContext())), TID }; Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); for (Value *V : WorkList) { CallInst *Call = dyn_cast<CallInst>(V); if (!Call) { Type *EltTy = V->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); // The operand's value should be corrected on its own. if (isa<AddrSpaceCastInst>(V)) continue; // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); continue; } IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call); if (!Intr) { // FIXME: What is this for? It doesn't make sense to promote arbitrary // function calls. If the call is to a defined function that can also be // promoted, we should be able to do this once that function is also // rewritten. std::vector<Type*> ArgTypes; for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands(); ArgIdx != ArgEnd; ++ArgIdx) { ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType()); } Function *F = Call->getCalledFunction(); FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes, F->isVarArg()); Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(), NewType, F->getAttributes()); Function *NewF = cast<Function>(C); Call->setCalledFunction(NewF); continue; } Builder.SetInsertPoint(Intr); switch (Intr->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // These intrinsics are for address space 0 only Intr->eraseFromParent(); continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(), MemCpy->getLength(), MemCpy->getAlignment(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memmove: { MemMoveInst *MemMove = cast<MemMoveInst>(Intr); Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getRawSource(), MemMove->getLength(), MemMove->getAlignment(), MemMove->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast<MemSetInst>(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(), MemSet->getAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::invariant_group_barrier: Intr->eraseFromParent(); // FIXME: I think the invariant marker should still theoretically apply, // but the intrinsics need to be changed to accept pointers with any // address space. continue; case Intrinsic::objectsize: { Value *Src = Intr->getOperand(0); Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall(ObjectSize, { Src, Intr->getOperand(1) }); Intr->replaceAllUsesWith(NewCall); Intr->eraseFromParent(); continue; } default: Intr->dump(); llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } }
// FIXME: Should try to pick the most likely to be profitable allocas first. bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) return false; IRBuilder<> Builder(&I); // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I, AS)) return true; // Promoted to vector. const Function &ContainingFunction = *I.getParent()->getParent(); CallingConv::ID CC = ContainingFunction.getCallingConv(); // Don't promote the alloca to LDS for shader calling conventions as the work // item ID intrinsics are not supported for these calling conventions. // Furthermore not all LDS is available for some of the stages. switch (CC) { case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: break; default: DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n"); return false; } // Not likely to have sufficient local memory for promotion. if (!SufficientLDS) return false; const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction); unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second; const DataLayout &DL = Mod->getDataLayout(); unsigned Align = I.getAlignment(); if (Align == 0) Align = DL.getABITypeAlignment(I.getAllocatedType()); // FIXME: This computed padding is likely wrong since it depends on inverse // usage order. // // FIXME: It is also possible that if we're allowed to use all of the memory // could could end up using more than the maximum due to alignment padding. uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align); uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy); NewSize += AllocSize; if (NewSize > LocalMemLimit) { DEBUG(dbgs() << " " << AllocSize << " bytes of local memory not available to promote\n"); return false; } CurrentLocalMemUsage = NewSize; std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, &I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); return false; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); Function *F = I.getParent()->getParent(); Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize); GlobalVariable *GV = new GlobalVariable( *Mod, GVTy, false, GlobalValue::InternalLinkage, UndefValue::get(GVTy), Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, AS.LOCAL_ADDRESS); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); GV->setAlignment(I.getAlignment()); Value *TCntY, *TCntZ; std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder); Value *TIdX = getWorkitemID(Builder, 0); Value *TIdY = getWorkitemID(Builder, 1); Value *TIdZ = getWorkitemID(Builder, 2); Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true); Tmp0 = Builder.CreateMul(Tmp0, TIdX); Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true); Value *TID = Builder.CreateAdd(Tmp0, Tmp1); TID = Builder.CreateAdd(TID, TIdZ); Value *Indices[] = { Constant::getNullValue(Type::getInt32Ty(Mod->getContext())), TID }; Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); for (Value *V : WorkList) { CallInst *Call = dyn_cast<CallInst>(V); if (!Call) { if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) { Value *Src0 = CI->getOperand(0); Type *EltTy = Src0->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); if (isa<ConstantPointerNull>(CI->getOperand(0))) CI->setOperand(0, ConstantPointerNull::get(NewTy)); if (isa<ConstantPointerNull>(CI->getOperand(1))) CI->setOperand(1, ConstantPointerNull::get(NewTy)); continue; } // The operand's value should be corrected on its own and we don't want to // touch the users. if (isa<AddrSpaceCastInst>(V)) continue; Type *EltTy = V->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); // Adjust the types of any constant operands. if (SelectInst *SI = dyn_cast<SelectInst>(V)) { if (isa<ConstantPointerNull>(SI->getOperand(1))) SI->setOperand(1, ConstantPointerNull::get(NewTy)); if (isa<ConstantPointerNull>(SI->getOperand(2))) SI->setOperand(2, ConstantPointerNull::get(NewTy)); } else if (PHINode *Phi = dyn_cast<PHINode>(V)) { for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { if (isa<ConstantPointerNull>(Phi->getIncomingValue(I))) Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy)); } } continue; } IntrinsicInst *Intr = cast<IntrinsicInst>(Call); Builder.SetInsertPoint(Intr); switch (Intr->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // These intrinsics are for address space 0 only Intr->eraseFromParent(); continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getDestAlignment(), MemCpy->getRawSource(), MemCpy->getSourceAlignment(), MemCpy->getLength(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memmove: { MemMoveInst *MemMove = cast<MemMoveInst>(Intr); Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getDestAlignment(), MemMove->getRawSource(), MemMove->getSourceAlignment(), MemMove->getLength(), MemMove->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast<MemSetInst>(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(), MemSet->getDestAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::invariant_group_barrier: Intr->eraseFromParent(); // FIXME: I think the invariant marker should still theoretically apply, // but the intrinsics need to be changed to accept pointers with any // address space. continue; case Intrinsic::objectsize: { Value *Src = Intr->getOperand(0); Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall( ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)}); Intr->replaceAllUsesWith(NewCall); Intr->eraseFromParent(); continue; } default: Intr->print(errs()); llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } return true; }
void DecomposeInsts::decomposeIntrinsics(BasicBlock* bb) { IRBuilder<> builder(module->getContext()); for (BasicBlock::iterator instI = bb->begin(), instE = bb->end(); instI != instE; /* empty */) { Instruction* inst = instI; // Note this increment of instI will skip decompositions of the code // inserted to decompose. E.g., if length -> dot, and dot is also to // be decomposed, then the decomposition of dot will be skipped // unless instI is reset. ++instI; IntrinsicInst* intrinsic = dyn_cast<IntrinsicInst>(inst); if (! intrinsic) continue; // Useful preamble for most case llvm::Value* arg0 = 0; llvm::Value* arg1 = 0; llvm::Value* arg2 = 0; if (inst->getNumOperands() > 0) arg0 = inst->getOperand(0); if (inst->getNumOperands() > 1) arg1 = inst->getOperand(1); if (inst->getNumOperands() > 2) arg2 = inst->getOperand(2); llvm::Value* newInst = 0; Type* instTypes[] = { inst->getType(), inst->getType(), inst->getType(), inst->getType() }; Type* argTypes[] = { arg0->getType(), arg0->getType(), arg0->getType(), arg0->getType() }; builder.SetInsertPoint(instI); switch (intrinsic->getIntrinsicID()) { case Intrinsic::gla_fRadians: { // always decompose // arg0 -> arg0 * pi / 180 const double pi_over_180 = 0.01745329251994329576923690768489; newInst = MultiplyByConstant(builder, arg0, pi_over_180); break; } case Intrinsic::gla_fDegrees: { // always decompose // arg0 -> arg0 * 180 / pi const double pi_into_180 = 57.295779513082320876798154814105; newInst = MultiplyByConstant(builder, arg0, pi_into_180); break; } case Intrinsic::gla_fMin: if (backEnd->decomposeIntrinsic(EDiMin)) { // // min(a,b) = select (a < b), a, b // llvm::Value* smeared = Smear(builder, module, arg1, arg0); newInst = builder.CreateFCmpOLT(arg0, smeared); newInst = builder.CreateSelect(newInst, arg0, smeared); } break; case Intrinsic::gla_fMax: if (backEnd->decomposeIntrinsic(EDiMax)) { // // max(a,b) = select (a > b), a, b // llvm::Value* smeared = Smear(builder, module, arg1, arg0); newInst = builder.CreateFCmpOGT(arg0, smeared); newInst = builder.CreateSelect(newInst, arg0, smeared); } break; case Intrinsic::gla_fClamp: if (backEnd->decomposeIntrinsic(EDiClamp)) { // // Clamp(x, minVal, maxVal) is defined to be min(max(x, minVal), maxVal). // // The 2nd and 3rd arguments match each other, but not necessarily // the 1st argument. In the decomposition, this difference matches // min/max's difference in their 1st and 2nd arguments. // argTypes[2] = arg1->getType(); // argTypes[*] start at 0 for the return value, arg* start at 0 for operand 0 Function* max = Intrinsic::getDeclaration(module, Intrinsic::gla_fMax, makeArrayRef(argTypes, 3)); Function* min = Intrinsic::getDeclaration(module, Intrinsic::gla_fMin, makeArrayRef(argTypes, 3)); newInst = builder.CreateCall2(max, arg0, arg1); newInst = builder.CreateCall2(min, newInst, arg2); // Make next iteration revisit this decomposition, in case min // or max are decomposed. instI = inst; ++instI; } break; case Intrinsic::gla_fAsin: if (backEnd->decomposeIntrinsic(EDiAsin)) { UnsupportedFunctionality("decomposition of gla_fAsin"); //changed = true; } break; case Intrinsic::gla_fAcos: if (backEnd->decomposeIntrinsic(EDiAcos)) { // TODO: functionality: Do we need to handle domain errors? (E.g., bad input value) // // acos(x) ~= sqrt(1-x)*(a + x*(b + x*(c + x*d))) // where a = 1.57079632679 // b = -0.213300989 // c = 0.077980478 // d = -0.0216409 // double a = 1.57079632679; double b = -0.213300989; double c = 0.077980478; double d = -0.0216409; // polynomial part, going right to left... llvm::Value* poly; poly = MultiplyByConstant(builder, arg0, d); poly = AddWithConstant(builder, poly, c); poly = builder.CreateFMul(arg0, poly); poly = AddWithConstant(builder, poly, b); poly = builder.CreateFMul(arg0, poly); poly = AddWithConstant(builder, poly, a); // sqrt part Function* sqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fSqrt, makeArrayRef(argTypes, 2)); newInst = builder.CreateFNeg(arg0); newInst = AddWithConstant(builder, newInst, 1.0); newInst = builder.CreateCall(sqrt, newInst); newInst = builder.CreateFMul(newInst, poly); } break; case Intrinsic::gla_fAtan: if (backEnd->decomposeIntrinsic(EDiAtan)) { UnsupportedFunctionality("decomposition of gla_fAtan"); //changed = true; } break; case Intrinsic::gla_fAtan2: if (backEnd->decomposeIntrinsic(EDiAtan2)) { UnsupportedFunctionality("decomposition of gla_fAtan2"); //changed = true; } break; case Intrinsic::gla_fCosh: if (backEnd->decomposeIntrinsic(EDiCosh)) { UnsupportedFunctionality("decomposition of gla_fCosh"); //changed = true; } break; case Intrinsic::gla_fSinh: if (backEnd->decomposeIntrinsic(EDiSinh)) { UnsupportedFunctionality("decomposition of gla_fSinh"); //changed = true; } break; case Intrinsic::gla_fTanh: if (backEnd->decomposeIntrinsic(EDiTanh)) { UnsupportedFunctionality("decomposition of gla_fTanh"); //changed = true; } break; case Intrinsic::gla_fAcosh: if (backEnd->decomposeIntrinsic(EDiACosh)) { UnsupportedFunctionality("decomposition of gla_fACosh"); //changed = true; } break; case Intrinsic::gla_fAsinh: if (backEnd->decomposeIntrinsic(EDiASinh)) { UnsupportedFunctionality("decomposition of gla_fASinh"); //changed = true; } break; case Intrinsic::gla_fAtanh: if (backEnd->decomposeIntrinsic(EDiATanh)) { UnsupportedFunctionality("decomposition of gla_fATanh"); //changed = true; } break; case Intrinsic::gla_fPowi: if (backEnd->decomposeIntrinsic(EDiPowi)) { UnsupportedFunctionality("decomposition of gla_fPowi"); //changed = true; } break; case Intrinsic::gla_fExp10: case Intrinsic::gla_fExp: if ((intrinsic->getIntrinsicID() == Intrinsic::gla_fExp10 && backEnd->decomposeIntrinsic(EDiExp10)) || (intrinsic->getIntrinsicID() == Intrinsic::gla_fExp && backEnd->decomposeIntrinsic(EDiExp))) { // 10^X = 2^(X /(log base 10 of 2)) // -> 10^X = 2^(X * 3.3219280948873623478703194294894) // // e^X = 2^(X /(log base e of 2)) // -> e^X = 2^(X * 1.4426950408889634073599246810019) //const double inv_log10_e = 2.3025850929940456840179914546844; // 10 -> e, in case it comes up const double inv_log10_2 = 3.3219280948873623478703194294894; // 10 -> 2 const double inv_loge_2 = 1.4426950408889634073599246810019; // e -> 2 double multiplier; if (intrinsic->getIntrinsicID() == Intrinsic::gla_fExp10) multiplier = inv_log10_2; else multiplier = inv_loge_2; newInst = MultiplyByConstant(builder, arg0, multiplier); Function* exp = Intrinsic::getDeclaration(module, Intrinsic::gla_fExp2, makeArrayRef(argTypes, 2)); newInst = builder.CreateCall(exp, newInst); } break; case Intrinsic::gla_fLog10: case Intrinsic::gla_fLog: if ((intrinsic->getIntrinsicID() == Intrinsic::gla_fLog10 && backEnd->decomposeIntrinsic(EDiLog10)) || (intrinsic->getIntrinsicID() == Intrinsic::gla_fLog && backEnd->decomposeIntrinsic(EDiLog))) { // log base 10 of X = (log base 10 of 2) * (log base 2 of X) // -> log base 10 of X = 0.30102999566398119521373889472449 * (log base 2 of X) // // log base e of X = (log base e of 2) * (log base 2 of X) // -> log base e of X = 0.69314718055994530941723212145818 * (log base 2 of X) //const double log10_e = 0.43429448190325182765112891891661; // 10 -> e, in case it comes up const double log10_2 = 0.30102999566398119521373889472449; // 10 -> 2 const double loge_2 = 0.69314718055994530941723212145818; // e -> 2 double multiplier; if (intrinsic->getIntrinsicID() == Intrinsic::gla_fLog10) multiplier = log10_2; else multiplier = loge_2; Function* log = Intrinsic::getDeclaration(module, Intrinsic::gla_fLog2, makeArrayRef(argTypes, 2)); newInst = builder.CreateCall(log, arg0); newInst = MultiplyByConstant(builder, newInst, multiplier); } break; case Intrinsic::gla_fInverseSqrt: if (backEnd->decomposeIntrinsic(EDiInverseSqrt)) { Function* sqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fSqrt, makeArrayRef(argTypes, 2)); newInst = builder.CreateCall(sqrt, arg0); newInst = builder.CreateFDiv(MakeFloatConstant(module->getContext(), 1.0), newInst); } break; case Intrinsic::gla_fFraction: if (backEnd->decomposeIntrinsic(EDiFraction)) { UnsupportedFunctionality("decomposition of gla_fFraction"); //changed = true; } break; case Intrinsic::gla_fSign: if (backEnd->decomposeIntrinsic(EDiSign)) { UnsupportedFunctionality("decomposition of gla_fSign"); //changed = true; } break; case Intrinsic::gla_fModF: if (backEnd->decomposeIntrinsic(EDiModF)) { UnsupportedFunctionality("decomposition of gla_fModF"); //changed = true; } break; case Intrinsic::gla_fMix: if (backEnd->decomposeIntrinsic(EDiMix)) { // // genType mix (x, y, a) = x * (1 - a) + y * a // llvm::Value* t; t = builder.CreateFNeg(arg2); t = AddWithConstant(builder, t, 1.0); t = builder.CreateFMul(arg0, t); newInst = builder.CreateFMul(arg1, arg2); newInst = builder.CreateFAdd(t, newInst); } break; case Intrinsic::gla_fStep: if (backEnd->decomposeIntrinsic(EDiStep)) { // // step(edge, x) is defined to be 0.0 if x < edge, otherwise 1.0. // llvm::FCmpInst::Predicate predicate = llvm::FCmpInst::FCMP_OLT; llvm::Value* condition = builder.CreateFCmp(predicate, arg1, arg0); newInst = builder.CreateSelect(condition, VectorizeConstant(GetComponentCount(arg1), MakeFloatConstant(module->getContext(), 0.0)), VectorizeConstant(GetComponentCount(arg1), MakeFloatConstant(module->getContext(), 1.0))); } break; case Intrinsic::gla_fSmoothStep: if (backEnd->decomposeIntrinsic(EDiSmoothStep)) { // // smoothstep (edge0, edge1, x) is defined to be // // t = clamp((x – edge0) / (edge1 – edge0), 0, 1) // t * t * (3 – 2 * t) // // where edge* can be scalar even if x is vector. // llvm::Value* smeared0 = Smear(builder, module, arg0, arg2); llvm::Value* smeared1 = Smear(builder, module, arg1, arg2); llvm::Value* numerator = builder.CreateFSub(arg2, smeared0, "numerator"); llvm::Value* denominator = builder.CreateFSub(smeared1, smeared0, "denominator"); llvm::Value* quotient = builder.CreateFDiv(numerator, denominator, "quotient"); llvm::Value* zero = MakeFloatConstant(module->getContext(), 0.0); llvm::Value* one = MakeFloatConstant(module->getContext(), 1.0); Type* newArgTypes[] = { quotient->getType(), quotient->getType(), zero->getType(), one->getType() }; Function* clamp = Intrinsic::getDeclaration(module, Intrinsic::gla_fClamp, newArgTypes); llvm::Value* t = builder.CreateCall3(clamp, quotient, zero, one); newInst = MultiplyByConstant(builder, t, 2.0); newInst = SubFromConstant(builder, 3.0, newInst); newInst = builder.CreateFMul(t, newInst); newInst = builder.CreateFMul(t, newInst); // Make next iteration revisit this decomposition, in case clamp is // decomposed. instI = inst; ++instI; } break; case Intrinsic::gla_fIsNan: if (backEnd->decomposeIntrinsic(EDiIsNan)) { UnsupportedFunctionality("decomposition of gla_fIsNan"); //changed = true; } break; case Intrinsic::gla_fFma: if (backEnd->decomposeIntrinsic(EDiFma)) { UnsupportedFunctionality("decomposition of gla_Fma"); //changed = true; } break; case Intrinsic::gla_fPackUnorm2x16: if (backEnd->decomposeIntrinsic(EDiPackUnorm2x16)) { UnsupportedFunctionality("decomposition of gla_fPackUnorm2x16"); //changed = true; } break; case Intrinsic::gla_fPackUnorm4x8: if (backEnd->decomposeIntrinsic(EDiPackUnorm4x8)) { UnsupportedFunctionality("decomposition of gla_fPackUnorm4x8"); //changed = true; } break; case Intrinsic::gla_fPackSnorm4x8: if (backEnd->decomposeIntrinsic(EDiPackSnorm4x8)) { UnsupportedFunctionality("decomposition of gla_fPackSnorm4x8"); //changed = true; } break; case Intrinsic::gla_fUnpackUnorm2x16: if (backEnd->decomposeIntrinsic(EDiUnpackUnorm2x16)) { UnsupportedFunctionality("decomposition of gla_fUnpackUnorm2x16"); //changed = true; } break; case Intrinsic::gla_fUnpackUnorm4x8: if (backEnd->decomposeIntrinsic(EDiUnpackUnorm4x8)) { UnsupportedFunctionality("decomposition of gla_fUnpackUnorm4x8"); //changed = true; } break; case Intrinsic::gla_fUnpackSnorm4x8: if (backEnd->decomposeIntrinsic(EDiUnpackSnorm4x8)) { UnsupportedFunctionality("decomposition of gla_fUnpackSnorm4x8"); //changed = true; } break; case Intrinsic::gla_fPackDouble2x32: if (backEnd->decomposeIntrinsic(EDiPackDouble2x32)) { UnsupportedFunctionality("decomposition of gla_fPackDouble2x32"); //changed = true; } break; case Intrinsic::gla_fUnpackDouble2x32: if (backEnd->decomposeIntrinsic(EDiUnpackDouble2x32)) { UnsupportedFunctionality("decomposition of gla_fUnpackDouble2x32"); //changed = true; } break; case Intrinsic::gla_fLength: if (backEnd->decomposeIntrinsic(EDiLength)) { if (GetComponentCount(arg0) > 1) { Function* dot = GetDotIntrinsic(module, argTypes); newInst = builder.CreateCall2(dot, arg0, arg0); Function* sqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fSqrt, makeArrayRef(instTypes, 2)); newInst = builder.CreateCall(sqrt, newInst); } else { Function* abs = Intrinsic::getDeclaration(module, Intrinsic::gla_fAbs, makeArrayRef(instTypes, 2)); newInst = builder.CreateCall(abs, arg0); } // Make next iteration revisit this decomposition, in case dot is // decomposed. instI = inst; ++instI; } break; case Intrinsic::gla_fDistance: if (backEnd->decomposeIntrinsic(EDiDistance)) { newInst = builder.CreateFSub(arg0, arg1); llvm::Type* types[] = { GetBasicType(newInst), newInst->getType() }; Function* length = Intrinsic::getDeclaration(module, Intrinsic::gla_fLength, types); newInst = builder.CreateCall(length, newInst); // Make next iteration revisit this decomposition, in case length is // decomposed. instI = inst; ++instI; } break; case Intrinsic::gla_fDot2: if (backEnd->decomposeIntrinsic(EDiDot)) { newInst = builder.CreateFMul(arg0, arg1); llvm::Value* element0 = builder.CreateExtractElement(newInst, MakeUnsignedConstant(module->getContext(), 0)); llvm::Value* element1 = builder.CreateExtractElement(newInst, MakeUnsignedConstant(module->getContext(), 1)); newInst = builder.CreateFAdd(element0, element1); } break; case Intrinsic::gla_fDot3: if (backEnd->decomposeIntrinsic(EDiDot)) { newInst = builder.CreateFMul(arg0, arg1); arg0 = newInst; llvm::Value* element0 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0)); llvm::Value* element1 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 1)); newInst = builder.CreateFAdd(element0, element1); llvm::Value* element = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 2)); newInst = builder.CreateFAdd(newInst, element); } break; case Intrinsic::gla_fDot4: if (backEnd->decomposeIntrinsic(EDiDot)) { newInst = builder.CreateFMul(arg0, arg1); arg0 = newInst; llvm::Value* element0 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0)); llvm::Value* element1 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 1)); newInst = builder.CreateFAdd(element0, element1); for (int el = 2; el < 4; ++el) { llvm::Value* element = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), el)); newInst = builder.CreateFAdd(newInst, element); } } break; case Intrinsic::gla_fCross: if (backEnd->decomposeIntrinsic(EDiCross)) { // (a1, a2, a3) X (b1, b2, b3) -> (a2*b3 - a3*b2, a3*b1 - a1*b3, a1*b2 - a2*b1) llvm::Value* a1 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0)); llvm::Value* a2 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 1)); llvm::Value* a3 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 2)); llvm::Value* b1 = builder.CreateExtractElement(arg1, MakeUnsignedConstant(module->getContext(), 0)); llvm::Value* b2 = builder.CreateExtractElement(arg1, MakeUnsignedConstant(module->getContext(), 1)); llvm::Value* b3 = builder.CreateExtractElement(arg1, MakeUnsignedConstant(module->getContext(), 2)); llvm::Value* empty = llvm::UndefValue::get(arg0->getType()); bool scalarized = false; if (scalarized) { // do it all with scalars // a2*b3 - a3*b2 llvm::Value* p1 = builder.CreateFMul(a2, b3); llvm::Value* p2 = builder.CreateFMul(a3, b2); llvm::Value* element = builder.CreateFSub(p1, p2); newInst = builder.CreateInsertElement(empty, element, MakeUnsignedConstant(module->getContext(), 0)); // a3*b1 - a1*b3 p1 = builder.CreateFMul(a3, b1); p2 = builder.CreateFMul(a1, b3); element = builder.CreateFSub(p1, p2); newInst = builder.CreateInsertElement(newInst, element, MakeUnsignedConstant(module->getContext(), 1)); // a1*b2 - a2*b1 p1 = builder.CreateFMul(a1, b2); p2 = builder.CreateFMul(a2, b1); element = builder.CreateFSub(p1, p2); newInst = builder.CreateInsertElement(newInst, element, MakeUnsignedConstant(module->getContext(), 2)); } else { // do it all with vectors // (a2, a3, a1) llvm::Value* aPerm; aPerm = builder.CreateInsertElement(empty, a2, MakeUnsignedConstant(module->getContext(), 0)); aPerm = builder.CreateInsertElement(aPerm, a3, MakeUnsignedConstant(module->getContext(), 1)); aPerm = builder.CreateInsertElement(aPerm, a1, MakeUnsignedConstant(module->getContext(), 2)); // (b3, b1, b2) llvm::Value* bPerm; bPerm = builder.CreateInsertElement(empty, b3, MakeUnsignedConstant(module->getContext(), 0)); bPerm = builder.CreateInsertElement(bPerm, b1, MakeUnsignedConstant(module->getContext(), 1)); bPerm = builder.CreateInsertElement(bPerm, b2, MakeUnsignedConstant(module->getContext(), 2)); // first term computation llvm::Value* firstTerm = builder.CreateFMul(aPerm, bPerm); // (a3, a1, a2) aPerm = builder.CreateInsertElement(empty, a3, MakeUnsignedConstant(module->getContext(), 0)); aPerm = builder.CreateInsertElement(aPerm, a1, MakeUnsignedConstant(module->getContext(), 1)); aPerm = builder.CreateInsertElement(aPerm, a2, MakeUnsignedConstant(module->getContext(), 2)); // (b2, b3, b1) bPerm = builder.CreateInsertElement(empty, b2, MakeUnsignedConstant(module->getContext(), 0)); bPerm = builder.CreateInsertElement(bPerm, b3, MakeUnsignedConstant(module->getContext(), 1)); bPerm = builder.CreateInsertElement(bPerm, b1, MakeUnsignedConstant(module->getContext(), 2)); // second term computation newInst = builder.CreateFMul(aPerm, bPerm); // Finish it off newInst = builder.CreateFSub(firstTerm, newInst); } } break; case Intrinsic::gla_fNormalize: if (backEnd->decomposeIntrinsic(EDiNormalize)) { if (GetComponentCount(arg0) > 1) { Function* dot = GetDotIntrinsic(module, argTypes); newInst = builder.CreateCall2(dot, arg0, arg0); llvm::Type* type[] = { newInst->getType(), newInst->getType() }; Function* inverseSqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fInverseSqrt, type); newInst = builder.CreateCall(inverseSqrt, newInst); // smear it llvm::Value* smeared = llvm::UndefValue::get(arg0->getType()); for (int c = 0; c < GetComponentCount(arg0); ++c) smeared = builder.CreateInsertElement(smeared, newInst, MakeIntConstant(module->getContext(), c)); newInst = builder.CreateFMul(arg0, smeared); } else { newInst = MakeFloatConstant(module->getContext(), 1.0); } // Make next iteration revisit this decomposition, in case dot or inverse-sqrt // are decomposed. instI = inst; ++instI; } break; case Intrinsic::gla_fNormalize3D: if (backEnd->decomposeIntrinsic(EDiNormalize3D)) { // Note: This does a 3D normalize on a vec3 or vec4. The width of arg0 does // not determine that width of the dot-product input, the "3" in the "3D" does. llvm::Type* types[] = { GetBasicType(argTypes[0]), argTypes[0], argTypes[1] }; Function* dot = Intrinsic::getDeclaration(module, Intrinsic::gla_fDot3, types); newInst = builder.CreateCall2(dot, arg0, arg0); llvm::Type* type[] = { newInst->getType(), newInst->getType() }; Function* inverseSqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fInverseSqrt, type); newInst = builder.CreateCall(inverseSqrt, newInst); // smear it llvm::Value* smeared = llvm::UndefValue::get(arg0->getType()); for (int c = 0; c < GetComponentCount(arg0); ++c) smeared = builder.CreateInsertElement(smeared, newInst, MakeIntConstant(module->getContext(), c)); // If we're 4-wide, copy over the original w component if (GetComponentCount(arg0) == 4) smeared = builder.CreateInsertElement(smeared, arg0, MakeIntConstant(module->getContext(), 4)); newInst = builder.CreateFMul(arg0, smeared); // Make next iteration revisit this decomposition, in case dot or inverse-sqrt // are decomposed. instI = inst; ++instI; } break; case Intrinsic::gla_fLit: if (backEnd->decomposeIntrinsic(EDiLit)) { UnsupportedFunctionality("decomposition of gla_fLit"); //changed = true; } break; case Intrinsic::gla_fFaceForward: if (backEnd->decomposeIntrinsic(EDiFaceForward)) { // // faceForward(N, I, Nref) is defined to be N if dot(Nref, I) < 0, otherwise return –N. // UnsupportedFunctionality("decomposition of gla_fFaceForward"); //changed = true; } break; case Intrinsic::gla_fReflect: if (backEnd->decomposeIntrinsic(EDiReflect)) { // // reflect(I, N) is defined to be I – 2 * dot(N, I) * N, // where N may be assumed to be normalized. // // Note if the number of components is 1, then N == 1 and // this turns into I - 2*I, or -I. // if (GetComponentCount(arg0) > 1) { Function* dot = GetDotIntrinsic(module, argTypes); newInst = builder.CreateCall2(dot, arg0, arg1); newInst = MultiplyByConstant(builder, newInst, 2.0); // smear this back up to a vector again llvm::Value* smeared = llvm::UndefValue::get(arg0->getType()); for (int c = 0; c < GetComponentCount(arg0); ++c) smeared = builder.CreateInsertElement(smeared, newInst, MakeIntConstant(module->getContext(), c)); newInst = builder.CreateFMul(smeared, arg1); newInst = builder.CreateFSub(arg0, newInst); } else { newInst = builder.CreateFNeg(arg0); } // Make next iteration revisit this decomposition, in case dot // is decomposed instI = inst; ++instI; } break; case Intrinsic::gla_fRefract: if (backEnd->decomposeIntrinsic(EDiRefract)) { UnsupportedFunctionality("decomposition of gla_fRefract"); //changed = true; } break; case Intrinsic::gla_fFilterWidth: if (backEnd->decomposeIntrinsic(EDiFilterWidth)) { // filterWidth = abs(dFdx(p)) + abs(dFdy(p)) Function* dFdx = Intrinsic::getDeclaration(module, Intrinsic::gla_fDFdx, makeArrayRef(argTypes, 2)); Function* dFdy = Intrinsic::getDeclaration(module, Intrinsic::gla_fDFdy, makeArrayRef(argTypes, 2)); Function* abs = Intrinsic::getDeclaration(module, Intrinsic::gla_fAbs, makeArrayRef(instTypes, 2)); llvm::Value* dx = builder.CreateCall(dFdx, arg0); llvm::Value* dy = builder.CreateCall(dFdy, arg0); dx = builder.CreateCall(abs, dx); dy = builder.CreateCall(abs, dy); newInst = builder.CreateFAdd(dx, dy); } break; case Intrinsic::gla_fFixedTransform: if (backEnd->decomposeIntrinsic(EDiFixedTransform)) { UnsupportedFunctionality("decomposition of gla_fFixedTransform"); //changed = true; } break; case Intrinsic::gla_any: if (backEnd->decomposeIntrinsic(EDiAny)) { if (GetComponentCount(arg0) == 1) UnsupportedFunctionality("any() on a scalar"); newInst = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0)); for (int c = 1; c < GetComponentCount(arg0); ++c) { llvm::Value* comp = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), c)); newInst = builder.CreateOr(newInst, comp); } } break; case Intrinsic::gla_all: if (backEnd->decomposeIntrinsic(EDiAll)) { if (GetComponentCount(arg0) == 1) UnsupportedFunctionality("all() on a scalar"); newInst = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0)); for (int c = 1; c < GetComponentCount(arg0); ++c) { llvm::Value* comp = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), c)); newInst = builder.CreateAnd(newInst, comp); } } break; case Intrinsic::gla_not: if (backEnd->decomposeIntrinsic(EDiNot)) { if (GetComponentCount(arg0) == 1) UnsupportedFunctionality("not() on a scalar"); newInst = builder.CreateNot(arg0); } break; case Intrinsic::gla_fTextureSample: case Intrinsic::gla_fTextureSampleLodRefZ: case Intrinsic::gla_fTextureSampleLodRefZOffset: case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad: if (backEnd->decomposeIntrinsic(EDiTextureProjection)) { // if projection flag is set, divide all coordinates (and refZ) by projection int texFlags = GetConstantInt(intrinsic->getArgOperand(GetTextureOpIndex(ETOFlag))); if (texFlags & ETFProjected) { // insert before intrinsic since we are not replacing it builder.SetInsertPoint(inst); // turn off projected flag to reflect decomposition texFlags &= ~ETFProjected; llvm::Value* coords = intrinsic->getArgOperand(GetTextureOpIndex(ETOCoord)); // determine how many channels are live after decomposition int newCoordWidth = 0; switch (GetConstantInt(intrinsic->getArgOperand(gla::ETOSamplerType))) { case gla::ESamplerBuffer: case gla::ESampler1D: newCoordWidth = 1; break; case gla::ESampler2D: case gla::ESampler2DRect: case gla::ESampler2DMS: newCoordWidth = 2; break; case gla::ESampler3D: newCoordWidth = 3; break; case gla::ESamplerCube: gla::UnsupportedFunctionality("projection with cube sampler"); break; default: assert(0 && "Unknown sampler type"); break; } if (texFlags & gla::ETFArrayed) gla::UnsupportedFunctionality("projection with arrayed sampler"); // projection resides in last component llvm::Value* projIdx = MakeUnsignedConstant(module->getContext(), GetComponentCount(coords) - 1); llvm::Value* divisor = builder.CreateExtractElement(coords, projIdx); llvm::Type* newCoordType; if (newCoordWidth > 1) newCoordType = llvm::VectorType::get(GetBasicType(coords), newCoordWidth); else newCoordType = GetBasicType(coords); // create space to hold results llvm::Value* newCoords = llvm::UndefValue::get(newCoordType); llvm::Value* smearedProj = llvm::UndefValue::get(newCoordType); if (newCoordWidth > 1) { for (int i = 0; i < newCoordWidth; ++i) { llvm::Value* idx = MakeUnsignedConstant(module->getContext(), i); // smear projection smearedProj = builder.CreateInsertElement(smearedProj, divisor, idx); // shrink coordinates to remove projection component llvm::Value* oldCoord = builder.CreateExtractElement(coords, idx); newCoords = builder.CreateInsertElement(newCoords, oldCoord, idx); } } else { smearedProj = divisor; newCoords = builder.CreateExtractElement(coords, MakeUnsignedConstant(module->getContext(), 0)); } // divide coordinates newCoords = builder.CreateFDiv(newCoords, smearedProj); // // Remaining code declares new intrinsic and modifies call arguments // // build up argTypes for flexible parameters, including result llvm::SmallVector<llvm::Type*, 5> types; // result type types.push_back(intrinsic->getType()); // use new coords to reflect shrink types.push_back(newCoords->getType()); // add offset switch (intrinsic->getIntrinsicID()) { case Intrinsic::gla_fTextureSampleLodRefZOffset: case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad: types.push_back(intrinsic->getArgOperand(ETOOffset)->getType()); default: break; } // add gradients switch (intrinsic->getIntrinsicID()) { case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad: types.push_back(intrinsic->getArgOperand(ETODPdx)->getType()); types.push_back(intrinsic->getArgOperand(ETODPdy)->getType()); default: break; } // declare the new intrinsic // TODO: functionality: texturing correctness: is this getting the correct non-projective form? Function* texture = Intrinsic::getDeclaration(module, intrinsic->getIntrinsicID(), types); // modify arguments to match new intrinsic intrinsic->setCalledFunction(texture); intrinsic->setArgOperand(ETOFlag, MakeUnsignedConstant(module->getContext(), texFlags)); intrinsic->setArgOperand(ETOCoord, newCoords); switch (intrinsic->getIntrinsicID()) { case Intrinsic::gla_fTextureSampleLodRefZ: case Intrinsic::gla_fTextureSampleLodRefZOffset: case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad: intrinsic->setArgOperand(ETORefZ, builder.CreateFDiv(intrinsic->getArgOperand(ETORefZ), divisor)); default: break; } // mark our change, but don't replace the intrinsic changed = true; } } break; default: // The cases above needs to be comprehensive in terms of checking // for what intrinsics to decompose. If not there the assumption is // it never needs to be decomposed. break; } if (newInst) { inst->replaceAllUsesWith(newInst); inst->dropAllReferences(); inst->eraseFromParent(); changed = true; } } }
bool IntrinsicCleanerPass::runOnBasicBlock(BasicBlock &b, Module &M) { bool dirty = false; bool block_split=false; #if LLVM_VERSION_CODE <= LLVM_VERSION(3, 1) unsigned WordSize = TargetData.getPointerSizeInBits() / 8; #else unsigned WordSize = DataLayout.getPointerSizeInBits() / 8; #endif for (BasicBlock::iterator i = b.begin(), ie = b.end(); (i != ie) && (block_split == false);) { IntrinsicInst *ii = dyn_cast<IntrinsicInst>(&*i); // increment now since LowerIntrinsic deletion makes iterator invalid. ++i; if(ii) { switch (ii->getIntrinsicID()) { case Intrinsic::vastart: case Intrinsic::vaend: break; // Lower vacopy so that object resolution etc is handled by // normal instructions. // // FIXME: This is much more target dependent than just the word size, // however this works for x86-32 and x86-64. case Intrinsic::vacopy: { // (dst, src) -> *((i8**) dst) = *((i8**) src) Value *dst = ii->getArgOperand(0); Value *src = ii->getArgOperand(1); if (WordSize == 4) { Type *i8pp = PointerType::getUnqual(PointerType::getUnqual(Type::getInt8Ty(getGlobalContext()))); Value *castedDst = CastInst::CreatePointerCast(dst, i8pp, "vacopy.cast.dst", ii); Value *castedSrc = CastInst::CreatePointerCast(src, i8pp, "vacopy.cast.src", ii); Value *load = new LoadInst(castedSrc, "vacopy.read", ii); new StoreInst(load, castedDst, false, ii); } else { assert(WordSize == 8 && "Invalid word size!"); Type *i64p = PointerType::getUnqual(Type::getInt64Ty(getGlobalContext())); Value *pDst = CastInst::CreatePointerCast(dst, i64p, "vacopy.cast.dst", ii); Value *pSrc = CastInst::CreatePointerCast(src, i64p, "vacopy.cast.src", ii); Value *val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii); Value *off = ConstantInt::get(Type::getInt64Ty(getGlobalContext()), 1); pDst = GetElementPtrInst::Create(pDst, off, std::string(), ii); pSrc = GetElementPtrInst::Create(pSrc, off, std::string(), ii); val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii); pDst = GetElementPtrInst::Create(pDst, off, std::string(), ii); pSrc = GetElementPtrInst::Create(pSrc, off, std::string(), ii); val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii); } ii->removeFromParent(); delete ii; break; } case Intrinsic::sadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::umul_with_overflow: { IRBuilder<> builder(ii->getParent(), ii); Value *op1 = ii->getArgOperand(0); Value *op2 = ii->getArgOperand(1); Value *result = 0; Value *result_ext = 0; Value *overflow = 0; unsigned int bw = op1->getType()->getPrimitiveSizeInBits(); unsigned int bw2 = op1->getType()->getPrimitiveSizeInBits()*2; if ((ii->getIntrinsicID() == Intrinsic::uadd_with_overflow) || (ii->getIntrinsicID() == Intrinsic::usub_with_overflow) || (ii->getIntrinsicID() == Intrinsic::umul_with_overflow)) { Value *op1ext = builder.CreateZExt(op1, IntegerType::get(M.getContext(), bw2)); Value *op2ext = builder.CreateZExt(op2, IntegerType::get(M.getContext(), bw2)); Value *int_max_s = ConstantInt::get(op1->getType(), APInt::getMaxValue(bw)); Value *int_max = builder.CreateZExt(int_max_s, IntegerType::get(M.getContext(), bw2)); if (ii->getIntrinsicID() == Intrinsic::uadd_with_overflow){ result_ext = builder.CreateAdd(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::usub_with_overflow){ result_ext = builder.CreateSub(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::umul_with_overflow){ result_ext = builder.CreateMul(op1ext, op2ext); } overflow = builder.CreateICmpUGT(result_ext, int_max); } else if ((ii->getIntrinsicID() == Intrinsic::sadd_with_overflow) || (ii->getIntrinsicID() == Intrinsic::ssub_with_overflow) || (ii->getIntrinsicID() == Intrinsic::smul_with_overflow)) { Value *op1ext = builder.CreateSExt(op1, IntegerType::get(M.getContext(), bw2)); Value *op2ext = builder.CreateSExt(op2, IntegerType::get(M.getContext(), bw2)); Value *int_max_s = ConstantInt::get(op1->getType(), APInt::getSignedMaxValue(bw)); Value *int_min_s = ConstantInt::get(op1->getType(), APInt::getSignedMinValue(bw)); Value *int_max = builder.CreateSExt(int_max_s, IntegerType::get(M.getContext(), bw2)); Value *int_min = builder.CreateSExt(int_min_s, IntegerType::get(M.getContext(), bw2)); if (ii->getIntrinsicID() == Intrinsic::sadd_with_overflow){ result_ext = builder.CreateAdd(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::ssub_with_overflow){ result_ext = builder.CreateSub(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::smul_with_overflow){ result_ext = builder.CreateMul(op1ext, op2ext); } overflow = builder.CreateOr(builder.CreateICmpSGT(result_ext, int_max), builder.CreateICmpSLT(result_ext, int_min)); } // This trunc cound be replaced by a more general trunc replacement // that allows to detect also undefined behavior in assignments or // overflow in operation with integers whose dimension is smaller than // int's dimension, e.g. // uint8_t = uint8_t + uint8_t; // if one desires the wrapping should write // uint8_t = (uint8_t + uint8_t) & 0xFF; // before this, must check if it has side effects on other operations result = builder.CreateTrunc(result_ext, op1->getType()); Value *resultStruct = builder.CreateInsertValue(UndefValue::get(ii->getType()), result, 0); resultStruct = builder.CreateInsertValue(resultStruct, overflow, 1); ii->replaceAllUsesWith(resultStruct); ii->removeFromParent(); delete ii; dirty = true; break; } case Intrinsic::dbg_value: case Intrinsic::dbg_declare: // Remove these regardless of lower intrinsics flag. This can // be removed once IntrinsicLowering is fixed to not have bad // caches. ii->eraseFromParent(); dirty = true; break; case Intrinsic::trap: { // Intrisic instruction "llvm.trap" found. Directly lower it to // a call of the abort() function. Function *F = cast<Function>( M.getOrInsertFunction( "abort", Type::getVoidTy(getGlobalContext()), NULL)); F->setDoesNotReturn(); F->setDoesNotThrow(); CallInst::Create(F, Twine(), ii); new UnreachableInst(getGlobalContext(), ii); ii->eraseFromParent(); dirty = true; break; } case Intrinsic::objectsize: { // We don't know the size of an object in general so we replace // with 0 or -1 depending on the second argument to the intrinsic. assert(ii->getNumArgOperands() == 2 && "wrong number of arguments"); Value *minArg = ii->getArgOperand(1); assert(minArg && "Failed to get second argument"); ConstantInt *minArgAsInt = dyn_cast<ConstantInt>(minArg); assert(minArgAsInt && "Second arg is not a ConstantInt"); assert(minArgAsInt->getBitWidth() == 1 && "Second argument is not an i1"); Value *replacement = NULL; LLVM_TYPE_Q IntegerType *intType = dyn_cast<IntegerType>(ii->getType()); assert(intType && "intrinsic does not have integer return type"); if (minArgAsInt->isZero()) { // min=false replacement = ConstantInt::get(intType, -1, /*isSigned=*/true); } else { // min=true replacement = ConstantInt::get(intType, 0, /*isSigned=*/false); } ii->replaceAllUsesWith(replacement); ii->eraseFromParent(); dirty = true; break; } default: if (LowerIntrinsics) IL->LowerIntrinsicCall(ii); dirty = true; break; } } } return dirty; }