~MMIAddrLabelMap() { assert(DeletedAddrLabelsNeedingEmission.empty() && "Some labels for deleted blocks never got emitted"); // Deallocate any of the 'list of symbols' case. for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I) if (I->second.Symbols.is<std::vector<MCSymbol*>*>()) delete I->second.Symbols.get<std::vector<MCSymbol*>*>(); }
bool FuncletLayout::runOnMachineFunction(MachineFunction &F) { DenseMap<const MachineBasicBlock *, int> FuncletMembership = getFuncletMembership(F); if (FuncletMembership.empty()) return false; F.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) { auto FuncletX = FuncletMembership.find(&X); auto FuncletY = FuncletMembership.find(&Y); assert(FuncletX != FuncletMembership.end()); assert(FuncletY != FuncletMembership.end()); return FuncletX->second < FuncletY->second; }); // Conservatively assume we changed something. return true; }
bool JumpInstrTables::runOnModule(Module &M) { JITI = &getAnalysis<JumpInstrTableInfo>(); // Get the set of jumptable-annotated functions. DenseMap<Function *, Function *> Functions; for (Function &F : M) { if (F.hasFnAttribute(Attribute::JumpTable)) { assert(F.hasUnnamedAddr() && "Attribute 'jumptable' requires 'unnamed_addr'"); Functions[&F] = nullptr; } } // Create the jump-table functions. for (auto &KV : Functions) { Function *F = KV.first; KV.second = insertEntry(M, F); } // GlobalAlias is a special case, because the target of an alias statement // must be a defined function. So, instead of replacing a given function in // the alias, we replace all uses of aliases that target jumptable functions. // Note that there's no need to create these functions, since only aliases // that target known jumptable functions are replaced, and there's no way to // put the jumptable annotation on a global alias. DenseMap<GlobalAlias *, Function *> Aliases; for (GlobalAlias &GA : M.aliases()) { Constant *Aliasee = GA.getAliasee(); if (Function *F = dyn_cast<Function>(Aliasee)) { auto it = Functions.find(F); if (it != Functions.end()) { Aliases[&GA] = it->second; } } } // Replace each address taken function with its jump-instruction table entry. for (auto &KV : Functions) replaceValueWithFunction(KV.first, KV.second); for (auto &KV : Aliases) replaceValueWithFunction(KV.first, KV.second); return !Functions.empty(); }
static void ComputeNumbering(Function *F, DenseMap<Value*,unsigned> &Numbering){ unsigned IN = 0; // Arguments get the first numbers. for (Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI) if (!AI->hasName()) Numbering[&*AI] = IN++; // Walk the basic blocks in order. for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) { if (!FI->hasName()) Numbering[&*FI] = IN++; // Walk the instructions in order. for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) // void instructions don't get numbers. if (!BI->hasName() && !BI->getType()->isVoidTy()) Numbering[&*BI] = IN++; } assert(!Numbering.empty() && "asked for numbering but numbering was no-op"); }
/// getNonLocalPointerDepFromBB - Perform a dependency query based on /// pointer/pointeesize starting at the end of StartBB. Add any clobber/def /// results to the results vector and keep track of which blocks are visited in /// 'Visited'. /// /// This has special behavior for the first block queries (when SkipFirstBlock /// is true). In this special case, it ignores the contents of the specified /// block and starts returning dependence info for its predecessors. /// /// This function returns false on success, or true to indicate that it could /// not compute dependence information for some reason. This should be treated /// as a clobber dependence on the first instruction in the predecessor block. bool MemoryDependenceAnalysis:: getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, bool isLoad, BasicBlock *StartBB, SmallVectorImpl<NonLocalDepEntry> &Result, DenseMap<BasicBlock*, Value*> &Visited, bool SkipFirstBlock) { // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer, isLoad); std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo = &NonLocalPointerDeps[CacheKey]; NonLocalDepInfo *Cache = &CacheInfo->second; // If we have valid cached information for exactly the block we are // investigating, just return it with no recomputation. if (CacheInfo->first == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { // We have a fully cached result for this query then we can just return the // cached results and populate the visited set. However, we have to verify // that we don't already have conflicting results for these blocks. Check // to ensure that if a block in the results set is in the visited set that // it was for the same pointer query. if (!Visited.empty()) { for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) { DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->first); if (VI == Visited.end() || VI->second == Pointer) continue; // We have a pointer mismatch in a block. Just return clobber, saying // that something was clobbered in this result. We could also do a // non-fully cached query, but there is little point in doing this. return true; } } for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) { Visited.insert(std::make_pair(I->first, Pointer)); if (!I->second.isNonLocal()) Result.push_back(*I); } ++NumCacheCompleteNonLocalPtr; return false; } // Otherwise, either this is a new block, a block with an invalid cache // pointer or one that we're about to invalidate by putting more info into it // than its valid cache info. If empty, the result will be valid cache info, // otherwise it isn't. if (Cache->empty()) CacheInfo->first = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); else CacheInfo->first = BBSkipFirstBlockPair(); SmallVector<BasicBlock*, 32> Worklist; Worklist.push_back(StartBB); // Keep track of the entries that we know are sorted. Previously cached // entries will all be sorted. The entries we add we only sort on demand (we // don't insert every element into its sorted position). We know that we // won't get any reuse from currently inserted values, because we don't // revisit blocks after we insert info for them. unsigned NumSortedEntries = Cache->size(); DEBUG(AssertSorted(*Cache)); while (!Worklist.empty()) { BasicBlock *BB = Worklist.pop_back_val(); // Skip the first block if we have it. if (!SkipFirstBlock) { // Analyze the dependency of *Pointer in FromBB. See if we already have // been here. assert(Visited.count(BB) && "Should check 'visited' before adding to WL"); // Get the dependency info for Pointer in BB. If we have cached // information, we will use it, otherwise we compute it. DEBUG(AssertSorted(*Cache, NumSortedEntries)); MemDepResult Dep = GetNonLocalInfoForBlock(Pointer, PointeeSize, isLoad, BB, Cache, NumSortedEntries); // If we got a Def or Clobber, add this to the list of results. if (!Dep.isNonLocal()) { Result.push_back(NonLocalDepEntry(BB, Dep)); continue; } } // If 'Pointer' is an instruction defined in this block, then we need to do // phi translation to change it into a value live in the predecessor block. // If phi translation fails, then we can't continue dependence analysis. Instruction *PtrInst = dyn_cast<Instruction>(Pointer); bool NeedsPHITranslation = PtrInst && PtrInst->getParent() == BB; // If no PHI translation is needed, just add all the predecessors of this // block to scan them as well. if (!NeedsPHITranslation) { SkipFirstBlock = false; for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { // Verify that we haven't looked at this block yet. std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> InsertRes = Visited.insert(std::make_pair(*PI, Pointer)); if (InsertRes.second) { // First time we've looked at *PI. Worklist.push_back(*PI); continue; } // If we have seen this block before, but it was with a different // pointer then we have a phi translation failure and we have to treat // this as a clobber. if (InsertRes.first->second != Pointer) goto PredTranslationFailure; } continue; } // If we do need to do phi translation, then there are a bunch of different // cases, because we have to find a Value* live in the predecessor block. We // know that PtrInst is defined in this block at least. // If this is directly a PHI node, just use the incoming values for each // pred as the phi translated version. if (PHINode *PtrPHI = dyn_cast<PHINode>(PtrInst)) { for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { BasicBlock *Pred = *PI; Value *PredPtr = PtrPHI->getIncomingValueForBlock(Pred); // Check to see if we have already visited this pred block with another // pointer. If so, we can't do this lookup. This failure can occur // with PHI translation when a critical edge exists and the PHI node in // the successor translates to a pointer value different than the // pointer the block was first analyzed with. std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> InsertRes = Visited.insert(std::make_pair(Pred, PredPtr)); if (!InsertRes.second) { // If the predecessor was visited with PredPtr, then we already did // the analysis and can ignore it. if (InsertRes.first->second == PredPtr) continue; // Otherwise, the block was previously analyzed with a different // pointer. We can't represent the result of this case, so we just // treat this as a phi translation failure. goto PredTranslationFailure; } // We may have added values to the cache list before this PHI // translation. If so, we haven't done anything to ensure that the // cache remains sorted. Sort it now (if needed) so that recursive // invocations of getNonLocalPointerDepFromBB that could reuse the cache // value will only see properly sorted cache arrays. if (Cache && NumSortedEntries != Cache->size()) std::sort(Cache->begin(), Cache->end()); Cache = 0; // FIXME: it is entirely possible that PHI translating will end up with // the same value. Consider PHI translating something like: // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* // to recurse here, pedantically speaking. // If we have a problem phi translating, fall through to the code below // to handle the failure condition. if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred, Result, Visited)) goto PredTranslationFailure; } // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->second; NumSortedEntries = Cache->size(); // Since we did phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all // results from the set" Clear out the indicator for this. CacheInfo->first = BBSkipFirstBlockPair(); SkipFirstBlock = false; continue; } // TODO: BITCAST, GEP. // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst)) // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); PredTranslationFailure: if (Cache == 0) { // Refresh the CacheInfo/Cache pointer if it got invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->second; NumSortedEntries = Cache->size(); } else if (NumSortedEntries != Cache->size()) { std::sort(Cache->begin(), Cache->end()); NumSortedEntries = Cache->size(); } // Since we did phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all // results from the set" Clear out the indicator for this. CacheInfo->first = BBSkipFirstBlockPair(); // If *nothing* works, mark the pointer as being clobbered by the first // instruction in this block. // // If this is the magic first block, return this as a clobber of the whole // incoming value. Since we can't phi translate to one of the predecessors, // we have to bail out. if (SkipFirstBlock) return true; for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { assert(I != Cache->rend() && "Didn't find current block??"); if (I->first != BB) continue; assert(I->second.isNonLocal() && "Should only be here with transparent block"); I->second = MemDepResult::getClobber(BB->begin()); ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey.getOpaqueValue()); Result.push_back(*I); break; } } // Okay, we're done now. If we added new values to the cache, re-sort it. switch (Cache->size()-NumSortedEntries) { case 0: // done, no new entries. break; case 2: { // Two new entries, insert the last one into place. NonLocalDepEntry Val = Cache->back(); Cache->pop_back(); NonLocalDepInfo::iterator Entry = std::upper_bound(Cache->begin(), Cache->end()-1, Val); Cache->insert(Entry, Val); // FALL THROUGH. } case 1: // One new entry, Just insert the new value at the appropriate position. if (Cache->size() != 1) { NonLocalDepEntry Val = Cache->back(); Cache->pop_back(); NonLocalDepInfo::iterator Entry = std::upper_bound(Cache->begin(), Cache->end(), Val); Cache->insert(Entry, Val); } break; default: // Added many values, do a full scale sort. std::sort(Cache->begin(), Cache->end()); } DEBUG(AssertSorted(*Cache)); return false; }
~MMIAddrLabelMap() { assert(DeletedAddrLabelsNeedingEmission.empty() && "Some labels for deleted blocks never got emitted"); }
bool DevirtModule::run() { Function *BitSetTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::bitset_test)); if (!BitSetTestFunc || BitSetTestFunc->use_empty()) return false; Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume)); if (!AssumeFunc || AssumeFunc->use_empty()) return false; // Find all virtual calls via a virtual table pointer %p under an assumption // of the form llvm.assume(llvm.bitset.test(%p, %md)). This indicates that %p // points to a vtable in the bitset %md. Group calls by (bitset, offset) pair // (effectively the identity of the virtual function) and store to CallSlots. DenseSet<Value *> SeenPtrs; for (auto I = BitSetTestFunc->use_begin(), E = BitSetTestFunc->use_end(); I != E;) { auto CI = dyn_cast<CallInst>(I->getUser()); ++I; if (!CI) continue; // Find llvm.assume intrinsics for this llvm.bitset.test call. SmallVector<CallInst *, 1> Assumes; for (const Use &CIU : CI->uses()) { auto AssumeCI = dyn_cast<CallInst>(CIU.getUser()); if (AssumeCI && AssumeCI->getCalledValue() == AssumeFunc) Assumes.push_back(AssumeCI); } // If we found any, search for virtual calls based on %p and add them to // CallSlots. if (!Assumes.empty()) { Metadata *BitSet = cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata(); Value *Ptr = CI->getArgOperand(0)->stripPointerCasts(); if (SeenPtrs.insert(Ptr).second) findLoadCallsAtConstantOffset(BitSet, Ptr, 0, CI->getArgOperand(0)); } // We no longer need the assumes or the bitset test. for (auto Assume : Assumes) Assume->eraseFromParent(); // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we // may use the vtable argument later. if (CI->use_empty()) CI->eraseFromParent(); } // Rebuild llvm.bitsets metadata into a map for easy lookup. std::vector<VTableBits> Bits; DenseMap<Metadata *, std::set<BitSetInfo>> BitSets; buildBitSets(Bits, BitSets); if (BitSets.empty()) return true; // For each (bitset, offset) pair: bool DidVirtualConstProp = false; for (auto &S : CallSlots) { // Search each of the vtables in the bitset for the virtual function // implementation at offset S.first.ByteOffset, and add to TargetsForSlot. std::vector<VirtualCallTarget> TargetsForSlot; if (!tryFindVirtualCallTargets(TargetsForSlot, BitSets[S.first.BitSetID], S.first.ByteOffset)) continue; if (trySingleImplDevirt(TargetsForSlot, S.second)) continue; DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second); } // Rebuild each global we touched as part of virtual constant propagation to // include the before and after bytes. if (DidVirtualConstProp) for (VTableBits &B : Bits) rebuildGlobal(B); return true; }
bool DevirtModule::run() { Function *TypeTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_test)); Function *TypeCheckedLoadFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume)); if ((!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc || AssumeFunc->use_empty()) && (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty())) return false; if (TypeTestFunc && AssumeFunc) scanTypeTestUsers(TypeTestFunc, AssumeFunc); if (TypeCheckedLoadFunc) scanTypeCheckedLoadUsers(TypeCheckedLoadFunc); // Rebuild type metadata into a map for easy lookup. std::vector<VTableBits> Bits; DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap; buildTypeIdentifierMap(Bits, TypeIdMap); if (TypeIdMap.empty()) return true; // For each (type, offset) pair: bool DidVirtualConstProp = false; std::map<std::string, Function*> DevirtTargets; for (auto &S : CallSlots) { // Search each of the members of the type identifier for the virtual // function implementation at offset S.first.ByteOffset, and add to // TargetsForSlot. std::vector<VirtualCallTarget> TargetsForSlot; if (!tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID], S.first.ByteOffset)) continue; if (!trySingleImplDevirt(TargetsForSlot, S.second) && tryVirtualConstProp(TargetsForSlot, S.second)) DidVirtualConstProp = true; // Collect functions devirtualized at least for one call site for stats. if (RemarksEnabled) for (const auto &T : TargetsForSlot) if (T.WasDevirt) DevirtTargets[T.Fn->getName()] = T.Fn; } if (RemarksEnabled) { // Generate remarks for each devirtualized function. for (const auto &DT : DevirtTargets) { Function *F = DT.second; DISubprogram *SP = F->getSubprogram(); DebugLoc DL = SP ? DebugLoc::get(SP->getScopeLine(), 0, SP) : DebugLoc(); emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, DL, Twine("devirtualized ") + F->getName()); } } // If we were able to eliminate all unsafe uses for a type checked load, // eliminate the type test by replacing it with true. if (TypeCheckedLoadFunc) { auto True = ConstantInt::getTrue(M.getContext()); for (auto &&U : NumUnsafeUsesForTypeTest) { if (U.second == 0) { U.first->replaceAllUsesWith(True); U.first->eraseFromParent(); } } } // Rebuild each global we touched as part of virtual constant propagation to // include the before and after bytes. if (DidVirtualConstProp) for (VTableBits &B : Bits) rebuildGlobal(B); return true; }
bool isEmpty() const { return Roots.empty(); }
bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { if (skipFunction(F)) return false; // TODO: Could probably handle variadic functions. if (F.isVarArg() || F.hasStructRetAttr() || AMDGPU::isEntryFunctionCC(F.getCallingConv())) return false; MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); unsigned ReturnNumRegs = 0; SmallSet<int, 4> OutArgIndexes; SmallVector<Type *, 4> ReturnTypes; Type *RetTy = F.getReturnType(); if (!RetTy->isVoidTy()) { ReturnNumRegs = DL->getTypeStoreSize(RetTy) / 4; if (ReturnNumRegs >= MaxNumRetRegs) return false; ReturnTypes.push_back(RetTy); } SmallVector<Argument *, 4> OutArgs; for (Argument &Arg : F.args()) { if (isOutArgumentCandidate(Arg)) { LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg << " in function " << F.getName() << '\n'); OutArgs.push_back(&Arg); } } if (OutArgs.empty()) return false; using ReplacementVec = SmallVector<std::pair<Argument *, Value *>, 4>; DenseMap<ReturnInst *, ReplacementVec> Replacements; SmallVector<ReturnInst *, 4> Returns; for (BasicBlock &BB : F) { if (ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back())) Returns.push_back(RI); } if (Returns.empty()) return false; bool Changing; do { Changing = false; // Keep retrying if we are able to successfully eliminate an argument. This // helps with cases with multiple arguments which may alias, such as in a // sincos implemntation. If we have 2 stores to arguments, on the first // attempt the MDA query will succeed for the second store but not the // first. On the second iteration we've removed that out clobbering argument // (by effectively moving it into another function) and will find the second // argument is OK to move. for (Argument *OutArg : OutArgs) { bool ThisReplaceable = true; SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores; Type *ArgTy = OutArg->getType()->getPointerElementType(); // Skip this argument if converting it will push us over the register // count to return limit. // TODO: This is an approximation. When legalized this could be more. We // can ask TLI for exactly how many. unsigned ArgNumRegs = DL->getTypeStoreSize(ArgTy) / 4; if (ArgNumRegs + ReturnNumRegs > MaxNumRetRegs) continue; // An argument is convertible only if all exit blocks are able to replace // it. for (ReturnInst *RI : Returns) { BasicBlock *BB = RI->getParent(); MemDepResult Q = MDA->getPointerDependencyFrom(MemoryLocation(OutArg), true, BB->end(), BB, RI); StoreInst *SI = nullptr; if (Q.isDef()) SI = dyn_cast<StoreInst>(Q.getInst()); if (SI) { LLVM_DEBUG(dbgs() << "Found out argument store: " << *SI << '\n'); ReplaceableStores.emplace_back(RI, SI); } else { ThisReplaceable = false; break; } } if (!ThisReplaceable) continue; // Try the next argument candidate. for (std::pair<ReturnInst *, StoreInst *> Store : ReplaceableStores) { Value *ReplVal = Store.second->getValueOperand(); auto &ValVec = Replacements[Store.first]; if (llvm::find_if(ValVec, [OutArg](const std::pair<Argument *, Value *> &Entry) { return Entry.first == OutArg;}) != ValVec.end()) { LLVM_DEBUG(dbgs() << "Saw multiple out arg stores" << *OutArg << '\n'); // It is possible to see stores to the same argument multiple times, // but we expect these would have been optimized out already. ThisReplaceable = false; break; } ValVec.emplace_back(OutArg, ReplVal); Store.second->eraseFromParent(); } if (ThisReplaceable) { ReturnTypes.push_back(ArgTy); OutArgIndexes.insert(OutArg->getArgNo()); ++NumOutArgumentsReplaced; Changing = true; } } } while (Changing); if (Replacements.empty()) return false; LLVMContext &Ctx = F.getParent()->getContext(); StructType *NewRetTy = StructType::create(Ctx, ReturnTypes, F.getName()); FunctionType *NewFuncTy = FunctionType::get(NewRetTy, F.getFunctionType()->params(), F.isVarArg()); LLVM_DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n'); Function *NewFunc = Function::Create(NewFuncTy, Function::PrivateLinkage, F.getName() + ".body"); F.getParent()->getFunctionList().insert(F.getIterator(), NewFunc); NewFunc->copyAttributesFrom(&F); NewFunc->setComdat(F.getComdat()); // We want to preserve the function and param attributes, but need to strip // off any return attributes, e.g. zeroext doesn't make sense with a struct. NewFunc->stealArgumentListFrom(F); AttrBuilder RetAttrs; RetAttrs.addAttribute(Attribute::SExt); RetAttrs.addAttribute(Attribute::ZExt); RetAttrs.addAttribute(Attribute::NoAlias); NewFunc->removeAttributes(AttributeList::ReturnIndex, RetAttrs); // TODO: How to preserve metadata? // Move the body of the function into the new rewritten function, and replace // this function with a stub. NewFunc->getBasicBlockList().splice(NewFunc->begin(), F.getBasicBlockList()); for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) { ReturnInst *RI = Replacement.first; IRBuilder<> B(RI); B.SetCurrentDebugLocation(RI->getDebugLoc()); int RetIdx = 0; Value *NewRetVal = UndefValue::get(NewRetTy); Value *RetVal = RI->getReturnValue(); if (RetVal) NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++); for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) { Argument *Arg = ReturnPoint.first; Value *Val = ReturnPoint.second; Type *EltTy = Arg->getType()->getPointerElementType(); if (Val->getType() != EltTy) { Type *EffectiveEltTy = EltTy; if (StructType *CT = dyn_cast<StructType>(EltTy)) { assert(CT->getNumElements() == 1); EffectiveEltTy = CT->getElementType(0); } if (DL->getTypeSizeInBits(EffectiveEltTy) != DL->getTypeSizeInBits(Val->getType())) { assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType())); Val = B.CreateShuffleVector(Val, UndefValue::get(Val->getType()), { 0, 1, 2 }); } Val = B.CreateBitCast(Val, EffectiveEltTy); // Re-create single element composite. if (EltTy != EffectiveEltTy) Val = B.CreateInsertValue(UndefValue::get(EltTy), Val, 0); } NewRetVal = B.CreateInsertValue(NewRetVal, Val, RetIdx++); } if (RetVal) RI->setOperand(0, NewRetVal); else { B.CreateRet(NewRetVal); RI->eraseFromParent(); } } SmallVector<Value *, 16> StubCallArgs; for (Argument &Arg : F.args()) { if (OutArgIndexes.count(Arg.getArgNo())) { // It's easier to preserve the type of the argument list. We rely on // DeadArgumentElimination to take care of these. StubCallArgs.push_back(UndefValue::get(Arg.getType())); } else { StubCallArgs.push_back(&Arg); } } BasicBlock *StubBB = BasicBlock::Create(Ctx, "", &F); IRBuilder<> B(StubBB); CallInst *StubCall = B.CreateCall(NewFunc, StubCallArgs); int RetIdx = RetTy->isVoidTy() ? 0 : 1; for (Argument &Arg : F.args()) { if (!OutArgIndexes.count(Arg.getArgNo())) continue; PointerType *ArgType = cast<PointerType>(Arg.getType()); auto *EltTy = ArgType->getElementType(); unsigned Align = Arg.getParamAlignment(); if (Align == 0) Align = DL->getABITypeAlignment(EltTy); Value *Val = B.CreateExtractValue(StubCall, RetIdx++); Type *PtrTy = Val->getType()->getPointerTo(ArgType->getAddressSpace()); // We can peek through bitcasts, so the type may not match. Value *PtrVal = B.CreateBitCast(&Arg, PtrTy); B.CreateAlignedStore(Val, PtrVal, Align); } if (!RetTy->isVoidTy()) { B.CreateRet(B.CreateExtractValue(StubCall, 0)); } else { B.CreateRetVoid(); } // The function is now a stub we want to inline. F.addFnAttr(Attribute::AlwaysInline); ++NumOutArgumentFunctionsReplaced; return true; }
bool DevirtModule::run() { Function *TypeTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_test)); Function *TypeCheckedLoadFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load)); Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume)); if ((!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc || AssumeFunc->use_empty()) && (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty())) return false; if (TypeTestFunc && AssumeFunc) scanTypeTestUsers(TypeTestFunc, AssumeFunc); if (TypeCheckedLoadFunc) scanTypeCheckedLoadUsers(TypeCheckedLoadFunc); // Rebuild type metadata into a map for easy lookup. std::vector<VTableBits> Bits; DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap; buildTypeIdentifierMap(Bits, TypeIdMap); if (TypeIdMap.empty()) return true; // For each (type, offset) pair: bool DidVirtualConstProp = false; for (auto &S : CallSlots) { // Search each of the members of the type identifier for the virtual // function implementation at offset S.first.ByteOffset, and add to // TargetsForSlot. std::vector<VirtualCallTarget> TargetsForSlot; if (!tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID], S.first.ByteOffset)) continue; if (trySingleImplDevirt(TargetsForSlot, S.second)) continue; DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second); } // If we were able to eliminate all unsafe uses for a type checked load, // eliminate the type test by replacing it with true. if (TypeCheckedLoadFunc) { auto True = ConstantInt::getTrue(M.getContext()); for (auto &&U : NumUnsafeUsesForTypeTest) { if (U.second == 0) { U.first->replaceAllUsesWith(True); U.first->eraseFromParent(); } } } // Rebuild each global we touched as part of virtual constant propagation to // include the before and after bytes. if (DidVirtualConstProp) for (VTableBits &B : Bits) rebuildGlobal(B); return true; }