/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow /// analysis and optimization. /// /// \return A new value representing the non-overflowing add if possible, /// otherwise return the original value. Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser, const DominatorTree *DT) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(IVUser); if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow) return IVUser; // Find a branch guarded by the overflow check. BranchInst *Branch = 0; Instruction *AddVal = 0; for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); UI != E; ++UI) { if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(*UI)) { if (ExtractInst->getNumIndices() != 1) continue; if (ExtractInst->getIndices()[0] == 0) AddVal = ExtractInst; else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse()) Branch = dyn_cast<BranchInst>(ExtractInst->use_back()); } } if (!AddVal || !Branch) return IVUser; BasicBlock *ContinueBB = Branch->getSuccessor(1); if (llvm::next(pred_begin(ContinueBB)) != pred_end(ContinueBB)) return IVUser; // Check if all users of the add are provably NSW. bool AllNSW = true; for (Value::use_iterator UI = AddVal->use_begin(), E = AddVal->use_end(); UI != E; ++UI) { if (Instruction *UseInst = dyn_cast<Instruction>(*UI)) { BasicBlock *UseBB = UseInst->getParent(); if (PHINode *PHI = dyn_cast<PHINode>(UseInst)) UseBB = PHI->getIncomingBlock(UI); if (!DT->dominates(ContinueBB, UseBB)) { AllNSW = false; break; } } } if (!AllNSW) return IVUser; // Go for it... IRBuilder<> Builder(IVUser); Instruction *AddInst = dyn_cast<Instruction>( Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1))); // The caller expects the new add to have the same form as the intrinsic. The // IV operand position must be the same. assert((AddInst->getOpcode() == Instruction::Add && AddInst->getOperand(0) == II->getOperand(0)) && "Bad add instruction created from overflow intrinsic."); AddVal->replaceAllUsesWith(AddInst); DeadInsts.push_back(AddVal); return AddInst; }
bool LLVMReachingDefsAnalysis::handleIntrinsicCall(LLVMNode *callNode, CallInst *CI, DefMap *df) { bool changed = false; IntrinsicInst *I = cast<IntrinsicInst>(CI); Value *dest; switch (I->getIntrinsicID()) { case Intrinsic::memmove: case Intrinsic::memcpy: case Intrinsic::memset: dest = I->getOperand(0); break; default: return handleUndefinedCall(callNode, CI, df); } LLVMNode *destNode = getOperand(callNode, dest, 1); assert(destNode && "No operand for intrinsic call"); for (const Pointer& ptr : destNode->getPointsTo()) { // we could compute all the concrete offsets, but // these functions usually set the whole memory, // so if we use UNKNOWN_OFFSET, the effect is the same changed |= df->add(Pointer(ptr.obj, UNKNOWN_OFFSET), callNode); } return changed; }
void MemoryInstrumenter::instrumentVarArgFunction(Function *F) { IntrinsicInst *VAStart = findAnyVAStart(F); assert(VAStart && "cannot find any llvm.va_start"); BitCastInst *ArrayDecay = cast<BitCastInst>(VAStart->getOperand(0)); assert(ArrayDecay->getType() == CharStarType); // The source of the bitcast does not have to be an alloca. In unoptimized // bitcode, it's likely a GEP. In that case, we need track further. Instruction *Alloca = ArrayDecay; while (!isa<AllocaInst>(Alloca)) { Alloca = cast<Instruction>(Alloca->getOperand(0)); } // Clone Alloca, ArrayDecay, and VAStart, and replace their operands. Instruction *ClonedAlloca = Alloca->clone(); Instruction *ClonedArrayDecay = ArrayDecay->clone(); Instruction *ClonedVAStart = VAStart->clone(); ClonedArrayDecay->setOperand(0, ClonedAlloca); ClonedVAStart->setOperand(0, ClonedArrayDecay); // Insert the cloned instructions to the entry block. BasicBlock::iterator InsertPos = F->begin()->begin(); BasicBlock::InstListType &InstList = F->begin()->getInstList(); InstList.insert(InsertPos, ClonedAlloca); InstList.insert(InsertPos, ClonedArrayDecay); InstList.insert(InsertPos, ClonedVAStart); // Hook the llvm.va_start. CallInst::Create(VAStartHook, ClonedArrayDecay, "", InsertPos); }
bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32( IntrinsicInst &I) const { assert(I.getIntrinsicID() == Intrinsic::bitreverse && "I must be bitreverse intrinsic"); assert(needsPromotionToI32(I.getType()) && "I does not need promotion to i32"); IRBuilder<> Builder(&I); Builder.SetCurrentDebugLocation(I.getDebugLoc()); Type *I32Ty = getI32Ty(Builder, I.getType()); Function *I32 = Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty }); Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty); Value *ExtRes = Builder.CreateCall(I32, { ExtOp }); Value *LShrOp = Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType())); Value *TruncRes = Builder.CreateTrunc(LShrOp, I.getType()); I.replaceAllUsesWith(TruncRes); I.eraseFromParent(); return true; }
bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // Lower all uses of llvm.objectsize.* IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II && II->getIntrinsicID() == Intrinsic::objectsize) { bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1); const Type *ReturnTy = CI->getType(); Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); CI->replaceAllUsesWith(RetVal); CI->eraseFromParent(); return true; } // From here on out we're working with named functions. if (CI->getCalledFunction() == 0) return false; // We'll need TargetData from here on out. const TargetData *TD = TLI ? TLI->getTargetData() : 0; if (!TD) return false; // Lower all default uses of _chk calls. This is very similar // to what InstCombineCalls does, but here we are only lowering calls // that have the default "don't know" as the objectsize. Anything else // should be left alone. CodeGenPrepareFortifiedLibCalls Simplifier; return Simplifier.fold(CI, TD); }
/// getModRefInfo - Check to see if the specified callsite can clobber the /// specified memory object. Since we only look at local properties of this /// function, we really can't say much about this query. We do, however, use /// simple "address taken" analysis on local objects. AliasAnalysis::ModRefResult BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { const Value *Object = P->getUnderlyingObject(); // If this is a tail call and P points to a stack location, we know that // the tail call cannot access or modify the local stack. // We cannot exclude byval arguments here; these belong to the caller of // the current function not to the current function, and a tail callee // may reference them. if (isa<AllocaInst>(Object)) if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) if (CI->isTailCall()) return NoModRef; // If the pointer is to a locally allocated object that does not escape, // then the call can not mod/ref the pointer unless the call takes the pointer // as an argument, and itself doesn't capture it. if (!isa<Constant>(Object) && CS.getInstruction() != Object && isNonEscapingLocalObject(Object)) { bool PassedAsArg = false; unsigned ArgNo = 0; for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI, ++ArgNo) { // Only look at the no-capture pointer arguments. if (!isa<PointerType>((*CI)->getType()) || !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture)) continue; // If this is a no-capture pointer argument, see if we can tell that it // is impossible to alias the pointer we're checking. If not, we have to // assume that the call could touch the pointer, even though it doesn't // escape. if (!isNoAlias(cast<Value>(CI), ~0U, P, ~0U)) { PassedAsArg = true; break; } } if (!PassedAsArg) return NoModRef; } // Finally, handle specific knowledge of intrinsics. IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); if (II == 0) return AliasAnalysis::getModRefInfo(CS, P, Size); switch (II->getIntrinsicID()) { default: break; case Intrinsic::memcpy: case Intrinsic::memmove: { unsigned Len = ~0U; if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3))) Len = LenCI->getZExtValue(); Value *Dest = II->getOperand(1); Value *Src = II->getOperand(2); if (isNoAlias(Dest, Len, P, Size)) { if (isNoAlias(Src, Len, P, Size)) return NoModRef; return Ref; } break; } case Intrinsic::memset: // Since memset is 'accesses arguments' only, the AliasAnalysis base class // will handle it for the variable length case. if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3))) { unsigned Len = LenCI->getZExtValue(); Value *Dest = II->getOperand(1); if (isNoAlias(Dest, Len, P, Size)) return NoModRef; } break; case Intrinsic::atomic_cmp_swap: case Intrinsic::atomic_swap: case Intrinsic::atomic_load_add: case Intrinsic::atomic_load_sub: case Intrinsic::atomic_load_and: case Intrinsic::atomic_load_nand: case Intrinsic::atomic_load_or: case Intrinsic::atomic_load_xor: case Intrinsic::atomic_load_max: case Intrinsic::atomic_load_min: case Intrinsic::atomic_load_umax: case Intrinsic::atomic_load_umin: if (TD) { Value *Op1 = II->getOperand(1); unsigned Op1Size = TD->getTypeStoreSize(Op1->getType()); if (isNoAlias(Op1, Op1Size, P, Size)) return NoModRef; } break; case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: { unsigned PtrSize = cast<ConstantInt>(II->getOperand(1))->getZExtValue(); if (isNoAlias(II->getOperand(2), PtrSize, P, Size)) return NoModRef; break; } case Intrinsic::invariant_end: { unsigned PtrSize = cast<ConstantInt>(II->getOperand(2))->getZExtValue(); if (isNoAlias(II->getOperand(3), PtrSize, P, Size)) return NoModRef; break; } } // The AliasAnalysis base class has some smarts, lets use them. return AliasAnalysis::getModRefInfo(CS, P, Size); }
void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) return; IRBuilder<> Builder(&I); // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I)) return; DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); const Function &ContainingFunction = *I.getParent()->getParent(); // FIXME: We should also try to get this value from the reqd_work_group_size // function attribute if it is available. unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction); int AllocaSize = WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy); if (AllocaSize > LocalMemAvailable) { DEBUG(dbgs() << " Not enough local memory to promote alloca.\n"); return; } std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); return; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); LocalMemAvailable -= AllocaSize; Function *F = I.getParent()->getParent(); Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize); GlobalVariable *GV = new GlobalVariable( *Mod, GVTy, false, GlobalValue::InternalLinkage, UndefValue::get(GVTy), Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS); GV->setUnnamedAddr(true); GV->setAlignment(I.getAlignment()); Value *TCntY, *TCntZ; std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder); Value *TIdX = getWorkitemID(Builder, 0); Value *TIdY = getWorkitemID(Builder, 1); Value *TIdZ = getWorkitemID(Builder, 2); Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true); Tmp0 = Builder.CreateMul(Tmp0, TIdX); Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true); Value *TID = Builder.CreateAdd(Tmp0, Tmp1); TID = Builder.CreateAdd(TID, TIdZ); Value *Indices[] = { Constant::getNullValue(Type::getInt32Ty(Mod->getContext())), TID }; Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); for (Value *V : WorkList) { CallInst *Call = dyn_cast<CallInst>(V); if (!Call) { Type *EltTy = V->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); // The operand's value should be corrected on its own. if (isa<AddrSpaceCastInst>(V)) continue; // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); continue; } IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call); if (!Intr) { // FIXME: What is this for? It doesn't make sense to promote arbitrary // function calls. If the call is to a defined function that can also be // promoted, we should be able to do this once that function is also // rewritten. std::vector<Type*> ArgTypes; for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands(); ArgIdx != ArgEnd; ++ArgIdx) { ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType()); } Function *F = Call->getCalledFunction(); FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes, F->isVarArg()); Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(), NewType, F->getAttributes()); Function *NewF = cast<Function>(C); Call->setCalledFunction(NewF); continue; } Builder.SetInsertPoint(Intr); switch (Intr->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // These intrinsics are for address space 0 only Intr->eraseFromParent(); continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(), MemCpy->getLength(), MemCpy->getAlignment(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memmove: { MemMoveInst *MemMove = cast<MemMoveInst>(Intr); Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getRawSource(), MemMove->getLength(), MemMove->getAlignment(), MemMove->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast<MemSetInst>(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(), MemSet->getAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::invariant_group_barrier: Intr->eraseFromParent(); // FIXME: I think the invariant marker should still theoretically apply, // but the intrinsics need to be changed to accept pointers with any // address space. continue; case Intrinsic::objectsize: { Value *Src = Intr->getOperand(0); Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall(ObjectSize, { Src, Intr->getOperand(1) }); Intr->replaceAllUsesWith(NewCall); Intr->eraseFromParent(); continue; } default: Intr->dump(); llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } }
// FIXME: Should try to pick the most likely to be profitable allocas first. bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) return false; IRBuilder<> Builder(&I); // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I, AS)) return true; // Promoted to vector. const Function &ContainingFunction = *I.getParent()->getParent(); CallingConv::ID CC = ContainingFunction.getCallingConv(); // Don't promote the alloca to LDS for shader calling conventions as the work // item ID intrinsics are not supported for these calling conventions. // Furthermore not all LDS is available for some of the stages. switch (CC) { case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: break; default: DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n"); return false; } // Not likely to have sufficient local memory for promotion. if (!SufficientLDS) return false; const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction); unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second; const DataLayout &DL = Mod->getDataLayout(); unsigned Align = I.getAlignment(); if (Align == 0) Align = DL.getABITypeAlignment(I.getAllocatedType()); // FIXME: This computed padding is likely wrong since it depends on inverse // usage order. // // FIXME: It is also possible that if we're allowed to use all of the memory // could could end up using more than the maximum due to alignment padding. uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align); uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy); NewSize += AllocSize; if (NewSize > LocalMemLimit) { DEBUG(dbgs() << " " << AllocSize << " bytes of local memory not available to promote\n"); return false; } CurrentLocalMemUsage = NewSize; std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, &I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); return false; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); Function *F = I.getParent()->getParent(); Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize); GlobalVariable *GV = new GlobalVariable( *Mod, GVTy, false, GlobalValue::InternalLinkage, UndefValue::get(GVTy), Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, AS.LOCAL_ADDRESS); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); GV->setAlignment(I.getAlignment()); Value *TCntY, *TCntZ; std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder); Value *TIdX = getWorkitemID(Builder, 0); Value *TIdY = getWorkitemID(Builder, 1); Value *TIdZ = getWorkitemID(Builder, 2); Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true); Tmp0 = Builder.CreateMul(Tmp0, TIdX); Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true); Value *TID = Builder.CreateAdd(Tmp0, Tmp1); TID = Builder.CreateAdd(TID, TIdZ); Value *Indices[] = { Constant::getNullValue(Type::getInt32Ty(Mod->getContext())), TID }; Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); for (Value *V : WorkList) { CallInst *Call = dyn_cast<CallInst>(V); if (!Call) { if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) { Value *Src0 = CI->getOperand(0); Type *EltTy = Src0->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); if (isa<ConstantPointerNull>(CI->getOperand(0))) CI->setOperand(0, ConstantPointerNull::get(NewTy)); if (isa<ConstantPointerNull>(CI->getOperand(1))) CI->setOperand(1, ConstantPointerNull::get(NewTy)); continue; } // The operand's value should be corrected on its own and we don't want to // touch the users. if (isa<AddrSpaceCastInst>(V)) continue; Type *EltTy = V->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); // Adjust the types of any constant operands. if (SelectInst *SI = dyn_cast<SelectInst>(V)) { if (isa<ConstantPointerNull>(SI->getOperand(1))) SI->setOperand(1, ConstantPointerNull::get(NewTy)); if (isa<ConstantPointerNull>(SI->getOperand(2))) SI->setOperand(2, ConstantPointerNull::get(NewTy)); } else if (PHINode *Phi = dyn_cast<PHINode>(V)) { for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { if (isa<ConstantPointerNull>(Phi->getIncomingValue(I))) Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy)); } } continue; } IntrinsicInst *Intr = cast<IntrinsicInst>(Call); Builder.SetInsertPoint(Intr); switch (Intr->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // These intrinsics are for address space 0 only Intr->eraseFromParent(); continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getDestAlignment(), MemCpy->getRawSource(), MemCpy->getSourceAlignment(), MemCpy->getLength(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memmove: { MemMoveInst *MemMove = cast<MemMoveInst>(Intr); Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getDestAlignment(), MemMove->getRawSource(), MemMove->getSourceAlignment(), MemMove->getLength(), MemMove->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast<MemSetInst>(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(), MemSet->getDestAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::invariant_group_barrier: Intr->eraseFromParent(); // FIXME: I think the invariant marker should still theoretically apply, // but the intrinsics need to be changed to accept pointers with any // address space. continue; case Intrinsic::objectsize: { Value *Src = Intr->getOperand(0); Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall( ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)}); Intr->replaceAllUsesWith(NewCall); Intr->eraseFromParent(); continue; } default: Intr->print(errs()); llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } return true; }
void LLVMDefUseAnalysis::handleIntrinsicCall(LLVMNode *callNode, CallInst *CI) { static std::set<Instruction *> warnings; IntrinsicInst *I = cast<IntrinsicInst>(CI); Value *dest, *src = nullptr; switch (I->getIntrinsicID()) { case Intrinsic::memmove: case Intrinsic::memcpy: src = I->getOperand(1); // fall-through case Intrinsic::memset: case Intrinsic::vastart: dest = I->getOperand(0); break; case Intrinsic::vaend: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::trap: // nothing to be done here return; case Intrinsic::bswap: case Intrinsic::prefetch: case Intrinsic::objectsize: case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: // nothing to be done, direct def-use edges // will be added later assert(I->getCalledFunction()->doesNotAccessMemory()); return; case Intrinsic::stacksave: case Intrinsic::stackrestore: if (warnings.insert(CI).second) llvmutils::printerr("WARN: stack save/restore not implemented", CI); return; default: llvmutils::printerr("WARNING: unhandled intrinsic call", I); // if it does not access memory, we can just add // direct def-use edges if (I->getCalledFunction()->doesNotAccessMemory()) return; assert (0 && "Unhandled intrinsic that accesses memory"); // for release builds, do the best we can here handleUndefinedCall(callNode, CI); return; } // we must have dest set assert(dest); // these functions touch the memory of the pointers addDataDependence(callNode, CI, dest, Offset::UNKNOWN /* FIXME */); if (src) addDataDependence(callNode, CI, src, Offset::UNKNOWN /* FIXME */); }