void LTOCodeGenerator::applyScopeRestrictions() { if (ScopeRestrictionsDone) return; Module *mergedModule = Linker.getModule(); // Start off with a verification pass. PassManager passes; passes.add(createVerifierPass()); // mark which symbols can not be internalized Mangler Mangler(TargetMach); std::vector<const char*> MustPreserveList; SmallPtrSet<GlobalValue*, 8> AsmUsed; std::vector<StringRef> Libcalls; TargetLibraryInfo TLI(Triple(TargetMach->getTargetTriple())); accumulateAndSortLibcalls(Libcalls, TLI, TargetMach->getTargetLowering()); for (Module::iterator f = mergedModule->begin(), e = mergedModule->end(); f != e; ++f) applyRestriction(*f, Libcalls, MustPreserveList, AsmUsed, Mangler); for (Module::global_iterator v = mergedModule->global_begin(), e = mergedModule->global_end(); v != e; ++v) applyRestriction(*v, Libcalls, MustPreserveList, AsmUsed, Mangler); for (Module::alias_iterator a = mergedModule->alias_begin(), e = mergedModule->alias_end(); a != e; ++a) applyRestriction(*a, Libcalls, MustPreserveList, AsmUsed, Mangler); GlobalVariable *LLVMCompilerUsed = mergedModule->getGlobalVariable("llvm.compiler.used"); findUsedValues(LLVMCompilerUsed, AsmUsed); if (LLVMCompilerUsed) LLVMCompilerUsed->eraseFromParent(); if (!AsmUsed.empty()) { llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(Context); std::vector<Constant*> asmUsed2; for (SmallPtrSet<GlobalValue*, 16>::const_iterator i = AsmUsed.begin(), e = AsmUsed.end(); i !=e; ++i) { GlobalValue *GV = *i; Constant *c = ConstantExpr::getBitCast(GV, i8PTy); asmUsed2.push_back(c); } llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size()); LLVMCompilerUsed = new llvm::GlobalVariable(*mergedModule, ATy, false, llvm::GlobalValue::AppendingLinkage, llvm::ConstantArray::get(ATy, asmUsed2), "llvm.compiler.used"); LLVMCompilerUsed->setSection("llvm.metadata"); } passes.add(createInternalizePass(MustPreserveList)); // apply scope restrictions passes.run(*mergedModule); ScopeRestrictionsDone = true; }
/// isLiveInButUnusedBefore - Return true if register is livein the MBB not /// not used before it reaches the MI that defines register. static bool isLiveInButUnusedBefore(unsigned Reg, MachineInstr *MI, MachineBasicBlock *MBB, const TargetRegisterInfo *TRI, MachineRegisterInfo* MRI) { // First check if register is livein. bool isLiveIn = false; for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) if (Reg == *I || TRI->isSuperRegister(Reg, *I)) { isLiveIn = true; break; } if (!isLiveIn) return false; // Is there any use of it before the specified MI? SmallPtrSet<MachineInstr*, 4> UsesInMBB; for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), UE = MRI->use_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); if (UseMO.isReg() && UseMO.isUndef()) continue; MachineInstr *UseMI = &*UI; if (UseMI->getParent() == MBB) UsesInMBB.insert(UseMI); } if (UsesInMBB.empty()) return true; for (MachineBasicBlock::iterator I = MBB->begin(), E = MI; I != E; ++I) if (UsesInMBB.count(&*I)) return false; return true; }
// Calculate the set of virtual registers that must be passed through each basic // block in order to satisfy the requirements of successor blocks. This is very // similar to calcRegsPassed, only backwards. void MachineVerifier::calcRegsRequired() { // First push live-in regs to predecessors' vregsRequired. SmallPtrSet<const MachineBasicBlock*, 8> todo; for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { const MachineBasicBlock &MBB(*MFI); BBInfo &MInfo = MBBInfoMap[&MBB]; for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(), PrE = MBB.pred_end(); PrI != PrE; ++PrI) { BBInfo &PInfo = MBBInfoMap[*PrI]; if (PInfo.addRequired(MInfo.vregsLiveIn)) todo.insert(*PrI); } } // Iteratively push vregsRequired to predecessors. This will converge to the // same final state regardless of DenseSet iteration order. while (!todo.empty()) { const MachineBasicBlock *MBB = *todo.begin(); todo.erase(MBB); BBInfo &MInfo = MBBInfoMap[MBB]; for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), PrE = MBB->pred_end(); PrI != PrE; ++PrI) { if (*PrI == MBB) continue; BBInfo &SInfo = MBBInfoMap[*PrI]; if (SInfo.addRequired(MInfo.vregsRequired)) todo.insert(*PrI); } } }
/// \brief Find an insertion point that dominates all uses. Instruction *ConstantHoisting:: findConstantInsertionPoint(const ConstantInfo &ConstInfo) const { assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry."); // Collect all IDoms. SmallPtrSet<BasicBlock *, 8> BBs; for (auto const &RCI : ConstInfo.RebasedConstants) BBs.insert(getIDom(RCI)); assert(!BBs.empty() && "No dominators!?"); if (BBs.count(Entry)) return &Entry->front(); while (BBs.size() >= 2) { BasicBlock *BB, *BB1, *BB2; BB1 = *BBs.begin(); BB2 = *std::next(BBs.begin()); BB = DT->findNearestCommonDominator(BB1, BB2); if (BB == Entry) return &Entry->front(); BBs.erase(BB1); BBs.erase(BB2); BBs.insert(BB); } assert((BBs.size() == 1) && "Expected only one element."); Instruction &FirstInst = (*BBs.begin())->front(); return findMatInsertPt(&FirstInst); }
// Calculate the largest possible vregsPassed sets. These are the registers that // can pass through an MBB live, but may not be live every time. It is assumed // that all vregsPassed sets are empty before the call. void MachineVerifier::calcRegsPassed() { // First push live-out regs to successors' vregsPassed. Remember the MBBs that // have any vregsPassed. SmallPtrSet<const MachineBasicBlock*, 8> todo; for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { const MachineBasicBlock &MBB(*MFI); BBInfo &MInfo = MBBInfoMap[&MBB]; if (!MInfo.reachable) continue; for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(), SuE = MBB.succ_end(); SuI != SuE; ++SuI) { BBInfo &SInfo = MBBInfoMap[*SuI]; if (SInfo.addPassed(MInfo.regsLiveOut)) todo.insert(*SuI); } } // Iteratively push vregsPassed to successors. This will converge to the same // final state regardless of DenseSet iteration order. while (!todo.empty()) { const MachineBasicBlock *MBB = *todo.begin(); todo.erase(MBB); BBInfo &MInfo = MBBInfoMap[MBB]; for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), SuE = MBB->succ_end(); SuI != SuE; ++SuI) { if (*SuI == MBB) continue; BBInfo &SInfo = MBBInfoMap[*SuI]; if (SInfo.addPassed(MInfo.vregsPassed)) todo.insert(*SuI); } } }
/// \brief Calculate edge weights for successors lead to unreachable. /// /// Predict that a successor which leads necessarily to an /// unreachable-terminated block as extremely unlikely. bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { TerminatorInst *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 0) { if (isa<UnreachableInst>(TI)) PostDominatedByUnreachable.insert(BB); return false; } SmallPtrSet<BasicBlock *, 4> UnreachableEdges; SmallPtrSet<BasicBlock *, 4> ReachableEdges; for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { if (PostDominatedByUnreachable.count(*I)) UnreachableEdges.insert(*I); else ReachableEdges.insert(*I); } // If all successors are in the set of blocks post-dominated by unreachable, // this block is too. if (UnreachableEdges.size() == TI->getNumSuccessors()) PostDominatedByUnreachable.insert(BB); // Skip probabilities if this block has a single successor or if all were // reachable. if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty()) return false; uint32_t UnreachableWeight = std::max(UR_TAKEN_WEIGHT / UnreachableEdges.size(), MIN_WEIGHT); for (SmallPtrSet<BasicBlock *, 4>::iterator I = UnreachableEdges.begin(), E = UnreachableEdges.end(); I != E; ++I) setEdgeWeight(BB, *I, UnreachableWeight); if (ReachableEdges.empty()) return true; uint32_t ReachableWeight = std::max(UR_NONTAKEN_WEIGHT / ReachableEdges.size(), NORMAL_WEIGHT); for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReachableEdges.begin(), E = ReachableEdges.end(); I != E; ++I) setEdgeWeight(BB, *I, ReachableWeight); return true; }
void LTOCodeGenerator::applyScopeRestrictions() { if (_scopeRestrictionsDone) return; Module *mergedModule = _linker.getModule(); // Start off with a verification pass. PassManager passes; passes.add(createVerifierPass()); // mark which symbols can not be internalized MCContext Context(*_target->getMCAsmInfo(), *_target->getRegisterInfo(),NULL); Mangler mangler(Context, _target); std::vector<const char*> mustPreserveList; SmallPtrSet<GlobalValue*, 8> asmUsed; for (Module::iterator f = mergedModule->begin(), e = mergedModule->end(); f != e; ++f) applyRestriction(*f, mustPreserveList, asmUsed, mangler); for (Module::global_iterator v = mergedModule->global_begin(), e = mergedModule->global_end(); v != e; ++v) applyRestriction(*v, mustPreserveList, asmUsed, mangler); for (Module::alias_iterator a = mergedModule->alias_begin(), e = mergedModule->alias_end(); a != e; ++a) applyRestriction(*a, mustPreserveList, asmUsed, mangler); GlobalVariable *LLVMCompilerUsed = mergedModule->getGlobalVariable("llvm.compiler.used"); findUsedValues(LLVMCompilerUsed, asmUsed); if (LLVMCompilerUsed) LLVMCompilerUsed->eraseFromParent(); if (!asmUsed.empty()) { llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(_context); std::vector<Constant*> asmUsed2; for (SmallPtrSet<GlobalValue*, 16>::const_iterator i = asmUsed.begin(), e = asmUsed.end(); i !=e; ++i) { GlobalValue *GV = *i; Constant *c = ConstantExpr::getBitCast(GV, i8PTy); asmUsed2.push_back(c); } llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size()); LLVMCompilerUsed = new llvm::GlobalVariable(*mergedModule, ATy, false, llvm::GlobalValue::AppendingLinkage, llvm::ConstantArray::get(ATy, asmUsed2), "llvm.compiler.used"); LLVMCompilerUsed->setSection("llvm.metadata"); } passes.add(createInternalizePass(mustPreserveList)); // apply scope restrictions passes.run(*mergedModule); _scopeRestrictionsDone = true; }
void LTOCodeGenerator::applyScopeRestrictions() { if (ScopeRestrictionsDone || !ShouldInternalize) return; // Start off with a verification pass. legacy::PassManager passes; passes.add(createVerifierPass()); // mark which symbols can not be internalized Mangler Mangler; std::vector<const char*> MustPreserveList; SmallPtrSet<GlobalValue*, 8> AsmUsed; std::vector<StringRef> Libcalls; TargetLibraryInfoImpl TLII(Triple(TargetMach->getTargetTriple())); TargetLibraryInfo TLI(TLII); accumulateAndSortLibcalls(Libcalls, TLI, *MergedModule, *TargetMach); for (Function &f : *MergedModule) applyRestriction(f, Libcalls, MustPreserveList, AsmUsed, Mangler); for (GlobalVariable &v : MergedModule->globals()) applyRestriction(v, Libcalls, MustPreserveList, AsmUsed, Mangler); for (GlobalAlias &a : MergedModule->aliases()) applyRestriction(a, Libcalls, MustPreserveList, AsmUsed, Mangler); GlobalVariable *LLVMCompilerUsed = MergedModule->getGlobalVariable("llvm.compiler.used"); findUsedValues(LLVMCompilerUsed, AsmUsed); if (LLVMCompilerUsed) LLVMCompilerUsed->eraseFromParent(); if (!AsmUsed.empty()) { llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(Context); std::vector<Constant*> asmUsed2; for (auto *GV : AsmUsed) { Constant *c = ConstantExpr::getBitCast(GV, i8PTy); asmUsed2.push_back(c); } llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size()); LLVMCompilerUsed = new llvm::GlobalVariable(*MergedModule, ATy, false, llvm::GlobalValue::AppendingLinkage, llvm::ConstantArray::get(ATy, asmUsed2), "llvm.compiler.used"); LLVMCompilerUsed->setSection("llvm.metadata"); } passes.add(createInternalizePass(MustPreserveList)); // apply scope restrictions passes.run(*MergedModule); ScopeRestrictionsDone = true; }
// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg // when following the CFG edge to SuccMBB. This needs to be after any def of // SrcReg, but before any subsequent point where control flow might jump out of // the basic block. MachineBasicBlock::iterator llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB, MachineBasicBlock &SuccMBB, unsigned SrcReg) { // Handle the trivial case trivially. if (MBB.empty()) return MBB.begin(); // Usually, we just want to insert the copy before the first terminator // instruction. However, for the edge going to a landing pad, we must insert // the copy before the call/invoke instruction. if (!SuccMBB.isLandingPad()) return MBB.getFirstTerminator(); // Discover any defs/uses in this basic block. SmallPtrSet<MachineInstr*, 8> DefUsesInMBB; for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), RE = MRI->reg_end(); RI != RE; ++RI) { MachineInstr *DefUseMI = &*RI; if (DefUseMI->getParent() == &MBB) DefUsesInMBB.insert(DefUseMI); } MachineBasicBlock::iterator InsertPoint; if (DefUsesInMBB.empty()) { // No defs. Insert the copy at the start of the basic block. InsertPoint = MBB.begin(); } else if (DefUsesInMBB.size() == 1) { // Insert the copy immediately after the def/use. InsertPoint = *DefUsesInMBB.begin(); ++InsertPoint; } else { // Insert the copy immediately after the last def/use. InsertPoint = MBB.end(); while (!DefUsesInMBB.count(&*--InsertPoint)) {} ++InsertPoint; } // Make sure the copy goes after any phi nodes however. return SkipPHIsAndLabels(MBB, InsertPoint); }
/// This methods creates the SSI representation for the list of values /// received. It will only create SSI representation if a value is used /// to decide a branch. Repeated values are created only once. /// void SSI::createSSI(SmallVectorImpl<Instruction *> &value) { init(value); SmallPtrSet<Instruction*, 4> needConstruction; for (SmallVectorImpl<Instruction*>::iterator I = value.begin(), E = value.end(); I != E; ++I) if (created.insert(*I)) needConstruction.insert(*I); insertSigmaFunctions(needConstruction); // Test if there is a need to transform to SSI if (!needConstruction.empty()) { insertPhiFunctions(needConstruction); renameInit(needConstruction); rename(DT_->getRoot()); fixPhis(); } clean(); }
// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg. // This needs to be after any def or uses of SrcReg, but before any subsequent // point where control flow might jump out of the basic block. MachineBasicBlock::iterator llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB, unsigned SrcReg) { // Handle the trivial case trivially. if (MBB.empty()) return MBB.begin(); // If this basic block does not contain an invoke, then control flow always // reaches the end of it, so place the copy there. The logic below works in // this case too, but is more expensive. if (!isa<InvokeInst>(MBB.getBasicBlock()->getTerminator())) return MBB.getFirstTerminator(); // Discover any definition/uses in this basic block. SmallPtrSet<MachineInstr*, 8> DefUsesInMBB; for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), RE = MRI->reg_end(); RI != RE; ++RI) { MachineInstr *DefUseMI = &*RI; if (DefUseMI->getParent() == &MBB) DefUsesInMBB.insert(DefUseMI); } MachineBasicBlock::iterator InsertPoint; if (DefUsesInMBB.empty()) { // No def/uses. Insert the copy at the start of the basic block. InsertPoint = MBB.begin(); } else if (DefUsesInMBB.size() == 1) { // Insert the copy immediately after the definition/use. InsertPoint = *DefUsesInMBB.begin(); ++InsertPoint; } else { // Insert the copy immediately after the last definition/use. InsertPoint = MBB.end(); while (!DefUsesInMBB.count(&*--InsertPoint)) {} ++InsertPoint; } // Make sure the copy goes after any phi nodes however. return SkipPHIsAndLabels(MBB, InsertPoint); }
// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg // when following the CFG edge to SuccMBB. This needs to be after any def of // SrcReg, but before any subsequent point where control flow might jump out of // the basic block. MachineBasicBlock::iterator llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, unsigned SrcReg) { // Handle the trivial case trivially. if (MBB->empty()) return MBB->begin(); // Usually, we just want to insert the copy before the first terminator // instruction. However, for the edge going to a landing pad, we must insert // the copy before the call/invoke instruction. if (!SuccMBB->isLandingPad()) return MBB->getFirstTerminator(); // Discover any defs/uses in this basic block. SmallPtrSet<MachineInstr*, 8> DefUsesInMBB; MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo(); for (MachineInstr &RI : MRI.reg_instructions(SrcReg)) { if (RI.getParent() == MBB) DefUsesInMBB.insert(&RI); } MachineBasicBlock::iterator InsertPoint; if (DefUsesInMBB.empty()) { // No defs. Insert the copy at the start of the basic block. InsertPoint = MBB->begin(); } else if (DefUsesInMBB.size() == 1) { // Insert the copy immediately after the def/use. InsertPoint = *DefUsesInMBB.begin(); ++InsertPoint; } else { // Insert the copy immediately after the last def/use. InsertPoint = MBB->end(); while (!DefUsesInMBB.count(&*--InsertPoint)) {} ++InsertPoint; } // Make sure the copy goes after any phi nodes however. return MBB->SkipPHIsAndLabels(InsertPoint); }
bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const { if (!Scopes || !NoAlias) return true; // Collect the set of scope domains relevant to the noalias scopes. SmallPtrSet<const MDNode *, 16> Domains; for (const MDOperand &MDOp : NoAlias->operands()) if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp)) if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain()) Domains.insert(Domain); // We alias unless, for some domain, the set of noalias scopes in that domain // is a superset of the set of alias scopes in that domain. for (const MDNode *Domain : Domains) { SmallPtrSet<const MDNode *, 16> ScopeNodes; collectMDInDomain(Scopes, Domain, ScopeNodes); if (ScopeNodes.empty()) continue; SmallPtrSet<const MDNode *, 16> NANodes; collectMDInDomain(NoAlias, Domain, NANodes); // To not alias, all of the nodes in ScopeNodes must be in NANodes. bool FoundAll = true; for (const MDNode *SMD : ScopeNodes) if (!NANodes.count(SMD)) { FoundAll = false; break; } if (FoundAll) return false; } return true; }
void VirtRegRewriter::rewrite() { SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; SmallPtrSet<const MachineInstr *, 4> NoReturnInsts; // Here we have a SparseSet to hold which PhysRegs are actually encountered // in the MF we are about to iterate over so that later when we call // setPhysRegUsed, we are only doing it for physRegs that were actually found // in the program and not for all of the possible physRegs for the given // target architecture. If the target has a lot of physRegs, then for a small // program there will be a significant compile time reduction here. PhysRegs.clear(); PhysRegs.setUniverse(TRI->getNumRegs()); // The function with uwtable should guarantee that the stack unwinder // can unwind the stack to the previous frame. Thus, we can't apply the // noreturn optimization if the caller function has uwtable attribute. bool HasUWTable = MF->getFunction()->hasFnAttribute(Attribute::UWTable); for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { DEBUG(MBBI->print(dbgs(), Indexes)); bool IsExitBB = MBBI->succ_empty(); for (MachineBasicBlock::instr_iterator MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { MachineInstr *MI = MII; ++MII; // Check if this instruction is a call to a noreturn function. If this // is a call to noreturn function and we don't need the stack unwinding // functionality (i.e. this function does not have uwtable attribute and // the callee function has the nounwind attribute), then we can ignore // the definitions set by this instruction. if (!HasUWTable && IsExitBB && MI->isCall()) { for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; if (!MO.isGlobal()) continue; const Function *Func = dyn_cast<Function>(MO.getGlobal()); if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) || // We need to keep correct unwind information // even if the function will not return, since the // runtime may need it. !Func->hasFnAttribute(Attribute::NoUnwind)) continue; NoReturnInsts.insert(MI); break; } } for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; // Make sure MRI knows about registers clobbered by regmasks. if (MO.isRegMask()) MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); // If we encounter a VirtReg or PhysReg then get at the PhysReg and add // it to the physreg bitset. Later we use only the PhysRegs that were // actually encountered in the MF to populate the MRI's used physregs. if (MO.isReg() && MO.getReg()) PhysRegs.insert( TargetRegisterInfo::isVirtualRegister(MO.getReg()) ? VRM->getPhys(MO.getReg()) : MO.getReg()); if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; unsigned VirtReg = MO.getReg(); unsigned PhysReg = VRM->getPhys(VirtReg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Instruction uses unmapped VirtReg"); assert(!MRI->isReserved(PhysReg) && "Reserved register assignment"); // Preserve semantics of sub-register operands. if (MO.getSubReg()) { // A virtual register kill refers to the whole register, so we may // have to add <imp-use,kill> operands for the super-register. A // partial redef always kills and redefines the super-register. if (MO.readsReg() && (MO.isDef() || MO.isKill())) SuperKills.push_back(PhysReg); if (MO.isDef()) { // The <def,undef> flag only makes sense for sub-register defs, and // we are substituting a full physreg. An <imp-use,kill> operand // from the SuperKills list will represent the partial read of the // super-register. MO.setIsUndef(false); // Also add implicit defs for the super-register. if (MO.isDead()) SuperDeads.push_back(PhysReg); else SuperDefs.push_back(PhysReg); } // PhysReg operands cannot have subregister indexes. PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg()); assert(PhysReg && "Invalid SubReg for physical register"); MO.setSubReg(0); } // Rewrite. Note we could have used MachineOperand::substPhysReg(), but // we need the inlining here. MO.setReg(PhysReg); } // Add any missing super-register kills after rewriting the whole // instruction. while (!SuperKills.empty()) MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true); while (!SuperDeads.empty()) MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true); while (!SuperDefs.empty()) MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI); DEBUG(dbgs() << "> " << *MI); // Finally, remove any identity copies. if (MI->isIdentityCopy()) { ++NumIdCopies; if (MI->getNumOperands() == 2) { DEBUG(dbgs() << "Deleting identity copy.\n"); if (Indexes) Indexes->removeMachineInstrFromMaps(MI); // It's safe to erase MI because MII has already been incremented. MI->eraseFromParent(); } else { // Transform identity copy to a KILL to deal with subregisters. MI->setDesc(TII->get(TargetOpcode::KILL)); DEBUG(dbgs() << "Identity copy: " << *MI); } } } } // Tell MRI about physical registers in use. if (NoReturnInsts.empty()) { for (SparseSet<unsigned>::iterator RegI = PhysRegs.begin(), E = PhysRegs.end(); RegI != E; ++RegI) if (!MRI->reg_nodbg_empty(*RegI)) MRI->setPhysRegUsed(*RegI); } else { for (SparseSet<unsigned>::iterator I = PhysRegs.begin(), E = PhysRegs.end(); I != E; ++I) { unsigned Reg = *I; if (MRI->reg_nodbg_empty(Reg)) continue; // Check if this register has a use that will impact the rest of the // code. Uses in debug and noreturn instructions do not impact the // generated code. for (MachineInstr &It : MRI->reg_nodbg_instructions(Reg)) { if (!NoReturnInsts.count(&It)) { MRI->setPhysRegUsed(Reg); break; } } } } }
/// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. /// CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. if (!F || !F->hasLocalLinkage()) return 0; // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs; unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgNo) if (I->getType()->isPointerTy()) PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo)); if (PointerArgs.empty()) return 0; // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. if (F->hasAddressTaken()) return 0; // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; SmallPtrSet<Argument*, 8> ByValArgsToTransform; for (unsigned i = 0; i != PointerArgs.size(); ++i) { bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal); // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe. Argument *PtrArg = PointerArgs[i].first; if (isByVal) { const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); if (const StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" << " than " << maxElements << " arguments to the function.\n"); } else { // If all the elements are single-value types, we can promote it. bool AllSimple = true; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) if (!STy->getElementType(i)->isSingleValueType()) { AllSimple = false; break; } // Safe to transform, don't even bother trying to "promote" it. // Passing the elements as a scalar will allow scalarrepl to hack on // the new alloca we introduce. if (AllSimple) { ByValArgsToTransform.insert(PtrArg); continue; } } } } // Otherwise, see if we can promote the pointer to its value. if (isSafeToPromoteArgument(PtrArg, isByVal)) ArgsToPromote.insert(PtrArg); } // No promotable pointer arguments. if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return 0; return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); }
/// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. /// CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. if (!F || !F->hasLocalLinkage()) return nullptr; // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<Argument*, 16> PointerArgs; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) if (I->getType()->isPointerTy()) PointerArgs.push_back(I); if (PointerArgs.empty()) return nullptr; // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function // is self-recursive. bool isSelfRecursive = false; for (Use &U : F->uses()) { CallSite CS(U.getUser()); // Must be a direct call. if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; if (CS.getInstruction()->getParent()->getParent() == F) isSelfRecursive = true; } // Don't promote arguments for variadic functions. Adding, removing, or // changing non-pack parameters can change the classification of pack // parameters. Frontends encode that classification at the call site in the // IR, while in the callee the classification is determined dynamically based // on the number of registers consumed so far. if (F->isVarArg()) return nullptr; // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; SmallPtrSet<Argument*, 8> ByValArgsToTransform; for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) { Argument *PtrArg = PointerArgs[i]; Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe, if the passed value is densely // packed or if we can prove the padding bytes are never accessed. This does // not apply to inalloca. bool isSafeToPromote = PtrArg->hasByValAttr() && (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg)); if (isSafeToPromote) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" << " than " << maxElements << " arguments to the function.\n"); continue; } // If all the elements are single-value types, we can promote it. bool AllSimple = true; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { if (!STy->getElementType(i)->isSingleValueType()) { AllSimple = false; break; } } // Safe to transform, don't even bother trying to "promote" it. // Passing the elements as a scalar will allow scalarrepl to hack on // the new alloca we introduce. if (AllSimple) { ByValArgsToTransform.insert(PtrArg); continue; } } } // If the argument is a recursive type and we're in a recursive // function, we could end up infinitely peeling the function argument. if (isSelfRecursive) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { bool RecursiveType = false; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { if (STy->getElementType(i) == PtrArg->getType()) { RecursiveType = true; break; } } if (RecursiveType) continue; } } // Otherwise, see if we can promote the pointer to its value. if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr())) ArgsToPromote.insert(PtrArg); } // No promotable pointer arguments. if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); }
/// \brief Retrieve the set of potential type bindings for the given /// representative type variable, along with flags indicating whether /// those types should be opened. ConstraintSystem::PotentialBindings ConstraintSystem::getPotentialBindings(TypeVariableType *typeVar) { assert(typeVar->getImpl().getRepresentative(nullptr) == typeVar && "not a representative"); assert(!typeVar->getImpl().getFixedType(nullptr) && "has a fixed type"); // Gather the constraints associated with this type variable. SmallVector<Constraint *, 8> constraints; llvm::SmallPtrSet<Constraint *, 4> visitedConstraints; getConstraintGraph().gatherConstraints( typeVar, constraints, ConstraintGraph::GatheringKind::EquivalenceClass); PotentialBindings result(typeVar); // Consider each of the constraints related to this type variable. llvm::SmallPtrSet<CanType, 4> exactTypes; llvm::SmallPtrSet<ProtocolDecl *, 4> literalProtocols; SmallVector<Constraint *, 2> defaultableConstraints; bool addOptionalSupertypeBindings = false; auto &tc = getTypeChecker(); bool hasNonDependentMemberRelationalConstraints = false; bool hasDependentMemberRelationalConstraints = false; for (auto constraint : constraints) { // Only visit each constraint once. if (!visitedConstraints.insert(constraint).second) continue; switch (constraint->getKind()) { case ConstraintKind::Bind: case ConstraintKind::Equal: case ConstraintKind::BindParam: case ConstraintKind::BindToPointerType: case ConstraintKind::Subtype: case ConstraintKind::Conversion: case ConstraintKind::ArgumentConversion: case ConstraintKind::ArgumentTupleConversion: case ConstraintKind::OperatorArgumentTupleConversion: case ConstraintKind::OperatorArgumentConversion: case ConstraintKind::OptionalObject: // Relational constraints: break out to look for types above/below. break; case ConstraintKind::BridgingConversion: case ConstraintKind::CheckedCast: case ConstraintKind::EscapableFunctionOf: case ConstraintKind::OpenedExistentialOf: case ConstraintKind::KeyPath: case ConstraintKind::KeyPathApplication: // Constraints from which we can't do anything. continue; case ConstraintKind::DynamicTypeOf: { // Direct binding of the left-hand side could result // in `DynamicTypeOf` failure if right-hand side is // bound (because 'Bind' requires equal types to // succeed), or left is bound to Any which is not an // [existential] metatype. auto dynamicType = constraint->getFirstType(); if (auto *tv = dynamicType->getAs<TypeVariableType>()) { if (tv->getImpl().getRepresentative(nullptr) == typeVar) return {typeVar}; } // This is right-hand side, let's continue. continue; } case ConstraintKind::Defaultable: // Do these in a separate pass. if (getFixedTypeRecursive(constraint->getFirstType(), true) ->getAs<TypeVariableType>() == typeVar) { defaultableConstraints.push_back(constraint); hasNonDependentMemberRelationalConstraints = true; } continue; case ConstraintKind::Disjunction: // FIXME: Recurse into these constraints to see whether this // type variable is fully bound by any of them. result.InvolvesTypeVariables = true; continue; case ConstraintKind::ConformsTo: case ConstraintKind::SelfObjectOfProtocol: // Swift 3 allowed the use of default types for normal conformances // to expressible-by-literal protocols. if (tc.Context.LangOpts.EffectiveLanguageVersion[0] >= 4) continue; if (!constraint->getSecondType()->is<ProtocolType>()) continue; LLVM_FALLTHROUGH; case ConstraintKind::LiteralConformsTo: { // If there is a 'nil' literal constraint, we might need optional // supertype bindings. if (constraint->getProtocol()->isSpecificProtocol( KnownProtocolKind::ExpressibleByNilLiteral)) addOptionalSupertypeBindings = true; // If there is a default literal type for this protocol, it's a // potential binding. auto defaultType = tc.getDefaultType(constraint->getProtocol(), DC); if (!defaultType) continue; // Note that we have a literal constraint with this protocol. literalProtocols.insert(constraint->getProtocol()); hasNonDependentMemberRelationalConstraints = true; // Handle unspecialized types directly. if (!defaultType->hasUnboundGenericType()) { if (!exactTypes.insert(defaultType->getCanonicalType()).second) continue; result.foundLiteralBinding(constraint->getProtocol()); result.addPotentialBinding({defaultType, AllowedBindingKind::Subtypes, constraint->getKind(), constraint->getProtocol()}); continue; } // For generic literal types, check whether we already have a // specialization of this generic within our list. // FIXME: This assumes that, e.g., the default literal // int/float/char/string types are never generic. auto nominal = defaultType->getAnyNominal(); if (!nominal) continue; bool matched = false; for (auto exactType : exactTypes) { if (auto exactNominal = exactType->getAnyNominal()) { // FIXME: Check parents? if (nominal == exactNominal) { matched = true; break; } } } if (!matched) { result.foundLiteralBinding(constraint->getProtocol()); exactTypes.insert(defaultType->getCanonicalType()); result.addPotentialBinding({defaultType, AllowedBindingKind::Subtypes, constraint->getKind(), constraint->getProtocol()}); } continue; } case ConstraintKind::ApplicableFunction: case ConstraintKind::BindOverload: { if (result.FullyBound && result.InvolvesTypeVariables) continue; // If this variable is in the left-hand side, it is fully bound. SmallPtrSet<TypeVariableType *, 4> typeVars; findInferableTypeVars(simplifyType(constraint->getFirstType()), typeVars); if (typeVars.count(typeVar)) result.FullyBound = true; if (result.InvolvesTypeVariables) continue; // If this and another type variable occur, this result involves // type variables. findInferableTypeVars(simplifyType(constraint->getSecondType()), typeVars); if (typeVars.size() > 1 && typeVars.count(typeVar)) result.InvolvesTypeVariables = true; continue; } case ConstraintKind::ValueMember: case ConstraintKind::UnresolvedValueMember: // If our type variable shows up in the base type, there's // nothing to do. // FIXME: Can we avoid simplification here? if (ConstraintSystem::typeVarOccursInType( typeVar, simplifyType(constraint->getFirstType()), &result.InvolvesTypeVariables)) { continue; } // If the type variable is in the list of member type // variables, it is fully bound. // FIXME: Can we avoid simplification here? if (ConstraintSystem::typeVarOccursInType( typeVar, simplifyType(constraint->getSecondType()), &result.InvolvesTypeVariables)) { result.FullyBound = true; } continue; } // Handle relational constraints. assert(constraint->getClassification() == ConstraintClassification::Relational && "only relational constraints handled here"); // Record constraint which contributes to the // finding of pontential bindings. result.Sources.insert(constraint); auto first = simplifyType(constraint->getFirstType()); auto second = simplifyType(constraint->getSecondType()); if (first->is<TypeVariableType>() && first->isEqual(second)) continue; Type type; AllowedBindingKind kind; if (first->getAs<TypeVariableType>() == typeVar) { // Upper bound for this type variable. type = second; kind = AllowedBindingKind::Subtypes; } else if (second->getAs<TypeVariableType>() == typeVar) { // Lower bound for this type variable. type = first; kind = AllowedBindingKind::Supertypes; } else { // Can't infer anything. if (result.InvolvesTypeVariables) continue; // Check whether both this type and another type variable are // inferable. SmallPtrSet<TypeVariableType *, 4> typeVars; findInferableTypeVars(first, typeVars); findInferableTypeVars(second, typeVars); if (typeVars.size() > 1 && typeVars.count(typeVar)) result.InvolvesTypeVariables = true; continue; } // Do not attempt to bind to ErrorType. if (type->hasError()) continue; // If the type we'd be binding to is a dependent member, don't try to // resolve this type variable yet. if (type->is<DependentMemberType>()) { if (!ConstraintSystem::typeVarOccursInType( typeVar, type, &result.InvolvesTypeVariables)) { hasDependentMemberRelationalConstraints = true; } continue; } hasNonDependentMemberRelationalConstraints = true; // Check whether we can perform this binding. // FIXME: this has a super-inefficient extraneous simplifyType() in it. bool isNilLiteral = false; bool *isNilLiteralPtr = nullptr; if (!addOptionalSupertypeBindings && kind == AllowedBindingKind::Supertypes) isNilLiteralPtr = &isNilLiteral; if (auto boundType = checkTypeOfBinding(typeVar, type, isNilLiteralPtr)) { type = *boundType; if (type->hasTypeVariable()) result.InvolvesTypeVariables = true; } else { // If the bound is a 'nil' literal type, add optional supertype bindings. if (isNilLiteral) { addOptionalSupertypeBindings = true; continue; } result.InvolvesTypeVariables = true; continue; } // Don't deduce autoclosure types or single-element, non-variadic // tuples. if (shouldBindToValueType(constraint)) { if (auto funcTy = type->getAs<FunctionType>()) { if (funcTy->isAutoClosure()) type = funcTy->getResult(); } type = type->getWithoutImmediateLabel(); } // Make sure we aren't trying to equate type variables with different // lvalue-binding rules. if (auto otherTypeVar = type->lookThroughAllOptionalTypes()->getAs<TypeVariableType>()) { if (typeVar->getImpl().canBindToLValue() != otherTypeVar->getImpl().canBindToLValue()) continue; } // BindParam constraints are not reflexive and must be treated specially. if (constraint->getKind() == ConstraintKind::BindParam) { if (kind == AllowedBindingKind::Subtypes) { if (auto *lvt = type->getAs<LValueType>()) { type = InOutType::get(lvt->getObjectType()); } } else if (kind == AllowedBindingKind::Supertypes) { if (auto *iot = type->getAs<InOutType>()) { type = LValueType::get(iot->getObjectType()); } } kind = AllowedBindingKind::Exact; } if (exactTypes.insert(type->getCanonicalType()).second) result.addPotentialBinding({type, kind, constraint->getKind()}); } // If we have any literal constraints, check whether there is already a // binding that provides a type that conforms to that literal protocol. In // such cases, remove the default binding suggestion because the existing // suggestion is better. if (!literalProtocols.empty()) { SmallPtrSet<ProtocolDecl *, 5> coveredLiteralProtocols; for (auto &binding : result.Bindings) { // Skip defaulted-protocol constraints. if (binding.DefaultedProtocol) continue; Type testType; switch (binding.Kind) { case AllowedBindingKind::Exact: testType = binding.BindingType; break; case AllowedBindingKind::Subtypes: case AllowedBindingKind::Supertypes: testType = binding.BindingType->getRValueType(); break; } // Check each non-covered literal protocol to determine which ones bool updatedBindingType = false; for (auto proto : literalProtocols) { do { // If the type conforms to this protocol, we're covered. if (tc.conformsToProtocol( testType, proto, DC, (ConformanceCheckFlags::InExpression| ConformanceCheckFlags::SkipConditionalRequirements))) { coveredLiteralProtocols.insert(proto); break; } // If we're allowed to bind to subtypes, look through optionals. // FIXME: This is really crappy special case of computing a reasonable // result based on the given constraints. if (binding.Kind == AllowedBindingKind::Subtypes) { if (auto objTy = testType->getOptionalObjectType()) { updatedBindingType = true; testType = objTy; continue; } } updatedBindingType = false; break; } while (true); } if (updatedBindingType) binding.BindingType = testType; } // For any literal type that has been covered, remove the default literal // type. if (!coveredLiteralProtocols.empty()) { result.Bindings.erase( std::remove_if(result.Bindings.begin(), result.Bindings.end(), [&](PotentialBinding &binding) { return binding.DefaultedProtocol && coveredLiteralProtocols.count( *binding.DefaultedProtocol) > 0; }), result.Bindings.end()); } } /// Add defaultable constraints last. for (auto constraint : defaultableConstraints) { Type type = constraint->getSecondType(); if (!exactTypes.insert(type->getCanonicalType()).second) continue; ++result.NumDefaultableBindings; result.addPotentialBinding({type, AllowedBindingKind::Exact, constraint->getKind(), None, constraint->getLocator()}); } // Determine if the bindings only constrain the type variable from above with // an existential type; such a binding is not very helpful because it's // impossible to enumerate the existential type's subtypes. result.SubtypeOfExistentialType = std::all_of(result.Bindings.begin(), result.Bindings.end(), [](const PotentialBinding &binding) { return binding.BindingType->isExistentialType() && binding.Kind == AllowedBindingKind::Subtypes; }); // If we're supposed to add optional supertype bindings, do so now. if (addOptionalSupertypeBindings) { for (unsigned i : indices(result.Bindings)) { auto &binding = result.Bindings[i]; bool wrapInOptional = false; if (binding.Kind == AllowedBindingKind::Supertypes) { // If the type doesn't conform to ExpressibleByNilLiteral, // produce an optional of that type as a potential binding. We // overwrite the binding in place because the non-optional type // will fail to type-check against the nil-literal conformance. auto nominalBindingDecl = binding.BindingType->getRValueType()->getAnyNominal(); bool conformsToExprByNilLiteral = false; if (nominalBindingDecl) { SmallVector<ProtocolConformance *, 2> conformances; conformsToExprByNilLiteral = nominalBindingDecl->lookupConformance( DC->getParentModule(), getASTContext().getProtocol( KnownProtocolKind::ExpressibleByNilLiteral), conformances); } wrapInOptional = !conformsToExprByNilLiteral; } else if (binding.isDefaultableBinding() && binding.BindingType->isAny()) { wrapInOptional = true; } if (wrapInOptional) { binding.BindingType = OptionalType::get(binding.BindingType); } } } // If there were both dependent-member and non-dependent-member relational // constraints, consider this "fully bound"; we don't want to touch it. if (hasDependentMemberRelationalConstraints) { if (hasNonDependentMemberRelationalConstraints) result.FullyBound = true; else result.Bindings.clear(); } return result; }
/// FindBackAndExitEdges - Search for back and exit edges for all blocks /// within the function loops, calculated using loop information. void BranchPredictionInfo::FindBackAndExitEdges(Function &F) { SmallPtrSet<const BasicBlock *, 64> LoopsVisited; SmallPtrSet<const BasicBlock *, 64> BlocksVisited; int count = 0; if(F.getName() == "hypre_SMGResidual") count = count + 1; for (LoopInfo::iterator LIT = LI->begin(), LIE = LI->end(); LIT != LIE; ++LIT) { Loop *rootLoop = *LIT; BasicBlock *rootHeader = rootLoop->getHeader(); // Check if we already visited this loop. if (LoopsVisited.count(rootHeader)) continue; // Create a stack to hold loops (inner most on the top). SmallVectorImpl<Loop *> Stack(8); SmallPtrSet<const BasicBlock *, 8> InStack; // Put the current loop into the Stack. Stack.push_back(rootLoop); InStack.insert(rootHeader); do { Loop *loop = Stack.back(); // Search for new inner loops. bool foundNew = false; for (Loop::iterator I = loop->begin(), E = loop->end(); I != E; ++I) { Loop *innerLoop = *I; BasicBlock *innerHeader = innerLoop->getHeader(); // Skip visited inner loops. if (!LoopsVisited.count(innerHeader)) { Stack.push_back(innerLoop); InStack.insert(innerHeader); foundNew = true; break; } } // If a new loop is found, continue. // Otherwise, it is time to expand it, because it is the most inner loop // yet unprocessed. if (foundNew) continue; // The variable "loop" is now the unvisited inner most loop. BasicBlock *header = loop->getHeader(); // Search for all basic blocks on the loop. for (Loop::block_iterator LBI = loop->block_begin(), LBE = loop->block_end(); LBI != LBE; ++LBI) { BasicBlock *lpBB = *LBI; if (!BlocksVisited.insert(lpBB)) continue; // Set the number of back edges to this loop head (lpBB) as zero. BackEdgesCount[lpBB] = 0; // For each loop block successor, check if the block pointing is // outside the loop. TerminatorInst *TI = lpBB->getTerminator(); for (unsigned s = 0; s < TI->getNumSuccessors(); ++s) { BasicBlock *successor = TI->getSuccessor(s); Edge edge = std::make_pair(lpBB, successor); // If the successor matches any loop header on the stack, // then it is a backedge. if (InStack.count(successor)) { listBackEdges.insert(edge); ++BackEdgesCount[lpBB]; } // If the successor is not present in the loop block list, then it is // an exit edge. if (!loop->contains(successor)) listExitEdges.insert(edge); } } // Cleaning the visited loop. LoopsVisited.insert(header); Stack.pop_back(); InStack.erase(header); } while (!InStack.empty()); } }
bool AMDGPUAlwaysInline::runOnModule(Module &M) { AMDGPUAS AMDGPUAS = AMDGPU::getAMDGPUAS(M); std::vector<GlobalAlias*> AliasesToRemove; SmallPtrSet<Function *, 8> FuncsToAlwaysInline; SmallPtrSet<Function *, 8> FuncsToNoInline; for (GlobalAlias &A : M.aliases()) { if (Function* F = dyn_cast<Function>(A.getAliasee())) { A.replaceAllUsesWith(F); AliasesToRemove.push_back(&A); } // FIXME: If the aliasee isn't a function, it's some kind of constant expr // cast that won't be inlined through. } if (GlobalOpt) { for (GlobalAlias* A : AliasesToRemove) { A->eraseFromParent(); } } // Always force inlining of any function that uses an LDS global address. This // is something of a workaround because we don't have a way of supporting LDS // objects defined in functions. LDS is always allocated by a kernel, and it // is difficult to manage LDS usage if a function may be used by multiple // kernels. // // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this // should only appear when IPO passes manages to move LDs defined in a kernel // into a single user function. for (GlobalVariable &GV : M.globals()) { // TODO: Region address unsigned AS = GV.getType()->getAddressSpace(); if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS.REGION_ADDRESS) continue; recursivelyVisitUsers(GV, FuncsToAlwaysInline); } if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) { auto IncompatAttr = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; for (Function &F : M) { if (!F.isDeclaration() && !F.use_empty() && !F.hasFnAttribute(IncompatAttr)) { if (StressCalls) { if (!FuncsToAlwaysInline.count(&F)) FuncsToNoInline.insert(&F); } else FuncsToAlwaysInline.insert(&F); } } } for (Function *F : FuncsToAlwaysInline) F->addFnAttr(Attribute::AlwaysInline); for (Function *F : FuncsToNoInline) F->addFnAttr(Attribute::NoInline); return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty(); }
/// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. /// CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. if (!F || !F->hasLocalLinkage()) return nullptr; // Don't promote arguments for variadic functions. Adding, removing, or // changing non-pack parameters can change the classification of pack // parameters. Frontends encode that classification at the call site in the // IR, while in the callee the classification is determined dynamically based // on the number of registers consumed so far. if (F->isVarArg()) return nullptr; // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<Argument*, 16> PointerArgs; for (Argument &I : F->args()) if (I.getType()->isPointerTy()) PointerArgs.push_back(&I); if (PointerArgs.empty()) return nullptr; // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function // is self-recursive. bool isSelfRecursive = false; for (Use &U : F->uses()) { CallSite CS(U.getUser()); // Must be a direct call. if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; if (CS.getInstruction()->getParent()->getParent() == F) isSelfRecursive = true; } const DataLayout &DL = F->getParent()->getDataLayout(); // We need to manually construct BasicAA directly in order to disable its use // of other function analyses. BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F)); // Construct our own AA results for this function. We do this manually to // work around the limitations of the legacy pass manager. AAResults AAR(createLegacyPMAAResults(*this, *F, BAR)); // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; SmallPtrSet<Argument*, 8> ByValArgsToTransform; for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) { Argument *PtrArg = PointerArgs[i]; Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // Replace sret attribute with noalias. This reduces register pressure by // avoiding a register copy. if (PtrArg->hasStructRetAttr()) { unsigned ArgNo = PtrArg->getArgNo(); F->setAttributes( F->getAttributes() .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); for (Use &U : F->uses()) { CallSite CS(U.getUser()); CS.setAttributes( CS.getAttributes() .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); } } // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe, if the passed value is densely // packed or if we can prove the padding bytes are never accessed. This does // not apply to inalloca. bool isSafeToPromote = PtrArg->hasByValAttr() && (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); if (isSafeToPromote) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" << " than " << maxElements << " arguments to the function.\n"); continue; } // If all the elements are single-value types, we can promote it. bool AllSimple = true; for (const auto *EltTy : STy->elements()) { if (!EltTy->isSingleValueType()) { AllSimple = false; break; } } // Safe to transform, don't even bother trying to "promote" it. // Passing the elements as a scalar will allow sroa to hack on // the new alloca we introduce. if (AllSimple) { ByValArgsToTransform.insert(PtrArg); continue; } } } // If the argument is a recursive type and we're in a recursive // function, we could end up infinitely peeling the function argument. if (isSelfRecursive) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { bool RecursiveType = false; for (const auto *EltTy : STy->elements()) { if (EltTy == PtrArg->getType()) { RecursiveType = true; break; } } if (RecursiveType) continue; } } // Otherwise, see if we can promote the pointer to its value. if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR)) ArgsToPromote.insert(PtrArg); } // No promotable pointer arguments. if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); }
/// handleEndBlock - Remove dead stores to stack-allocated locations in the /// function end block. Ex: /// %A = alloca i32 /// ... /// store i32 1, i32* %A /// ret void bool DSE::handleEndBlock(BasicBlock &BB) { bool MadeChange = false; // Keep track of all of the stack objects that are dead at the end of the // function. SmallPtrSet<Value*, 16> DeadStackObjects; // Find all of the alloca'd pointers in the entry block. BasicBlock *Entry = BB.getParent()->begin(); for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) DeadStackObjects.insert(AI); // Treat byval arguments the same, stores to them are dead at the end of the // function. for (Function::arg_iterator AI = BB.getParent()->arg_begin(), AE = BB.getParent()->arg_end(); AI != AE; ++AI) if (AI->hasByValAttr()) DeadStackObjects.insert(AI); // Scan the basic block backwards for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ --BBI; // If we find a store, check to see if it points into a dead stack value. if (hasMemoryWrite(BBI) && isRemovable(BBI)) { // See through pointer-to-pointer bitcasts Value *Pointer = getStoredPointerOperand(BBI)->getUnderlyingObject(); // Stores to stack values are valid candidates for removal. if (DeadStackObjects.count(Pointer)) { Instruction *Dead = BBI++; DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " << *Dead << "\n Object: " << *Pointer << '\n'); // DCE instructions only used to calculate that store. DeleteDeadInstruction(Dead, *MD, &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; } } // Remove any dead non-memory-mutating instructions. if (isInstructionTriviallyDead(BBI)) { Instruction *Inst = BBI++; DeleteDeadInstruction(Inst, *MD, &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; } if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) { DeadStackObjects.erase(A); continue; } if (CallSite CS = cast<Value>(BBI)) { // If this call does not access memory, it can't be loading any of our // pointers. if (AA->doesNotAccessMemory(CS)) continue; unsigned NumModRef = 0, NumOther = 0; // If the call might load from any of our allocas, then any store above // the call is live. SmallVector<Value*, 8> LiveAllocas; for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { // If we detect that our AA is imprecise, it's not worth it to scan the // rest of the DeadPointers set. Just assume that the AA will return // ModRef for everything, and go ahead and bail out. if (NumModRef >= 16 && NumOther == 0) return MadeChange; // See if the call site touches it. AliasAnalysis::ModRefResult A = AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA)); if (A == AliasAnalysis::ModRef) ++NumModRef; else ++NumOther; if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) LiveAllocas.push_back(*I); } for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), E = LiveAllocas.end(); I != E; ++I) DeadStackObjects.erase(*I); // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. if (DeadStackObjects.empty()) return MadeChange; continue; } AliasAnalysis::Location LoadedLoc; // If we encounter a use of the pointer, it is no longer considered dead if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { LoadedLoc = AA->getLocation(L); } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { LoadedLoc = AA->getLocation(V); } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) { LoadedLoc = AA->getLocationForSource(MTI); } else { // Not a loading instruction. continue; } // Remove any allocas from the DeadPointer set that are loaded, as this // makes any stores above the access live. RemoveAccessedObjects(LoadedLoc, DeadStackObjects); // If all of the allocas were clobbered by the access then we're not going // to find anything else to process. if (DeadStackObjects.empty()) break; } return MadeChange; }
ModuleSummaryIndex llvm::buildModuleSummaryIndex( const Module &M, std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback, ProfileSummaryInfo *PSI) { ModuleSummaryIndex Index; // Identify the local values in the llvm.used and llvm.compiler.used sets, // which should not be exported as they would then require renaming and // promotion, but we may have opaque uses e.g. in inline asm. We collect them // here because we use this information to mark functions containing inline // assembly calls as not importable. SmallPtrSet<GlobalValue *, 8> LocalsUsed; SmallPtrSet<GlobalValue *, 8> Used; // First collect those in the llvm.used set. collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false); // Next collect those in the llvm.compiler.used set. collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ true); DenseSet<GlobalValue::GUID> CantBePromoted; for (auto *V : Used) { if (V->hasLocalLinkage()) { LocalsUsed.insert(V); CantBePromoted.insert(V->getGUID()); } } // Compute summaries for all functions defined in module, and save in the // index. for (auto &F : M) { if (F.isDeclaration()) continue; BlockFrequencyInfo *BFI = nullptr; std::unique_ptr<BlockFrequencyInfo> BFIPtr; if (GetBFICallback) BFI = GetBFICallback(F); else if (F.getEntryCount().hasValue()) { LoopInfo LI{DominatorTree(const_cast<Function &>(F))}; BranchProbabilityInfo BPI{F, LI}; BFIPtr = llvm::make_unique<BlockFrequencyInfo>(F, BPI, LI); BFI = BFIPtr.get(); } computeFunctionSummary(Index, M, F, BFI, PSI, !LocalsUsed.empty(), CantBePromoted); } // Compute summaries for all variables defined in module, and save in the // index. for (const GlobalVariable &G : M.globals()) { if (G.isDeclaration()) continue; computeVariableSummary(Index, G, CantBePromoted); } // Compute summaries for all aliases defined in module, and save in the // index. for (const GlobalAlias &A : M.aliases()) computeAliasSummary(Index, A, CantBePromoted); for (auto *V : LocalsUsed) { auto *Summary = Index.getGlobalValueSummary(*V); assert(Summary && "Missing summary for global value"); Summary->setNotEligibleToImport(); } // The linker doesn't know about these LLVM produced values, so we need // to flag them as live in the index to ensure index-based dead value // analysis treats them as live roots of the analysis. setLiveRoot(Index, "llvm.used"); setLiveRoot(Index, "llvm.compiler.used"); setLiveRoot(Index, "llvm.global_ctors"); setLiveRoot(Index, "llvm.global_dtors"); setLiveRoot(Index, "llvm.global.annotations"); if (!M.getModuleInlineAsm().empty()) { // Collect the local values defined by module level asm, and set up // summaries for these symbols so that they can be marked as NoRename, // to prevent export of any use of them in regular IR that would require // renaming within the module level asm. Note we don't need to create a // summary for weak or global defs, as they don't need to be flagged as // NoRename, and defs in module level asm can't be imported anyway. // Also, any values used but not defined within module level asm should // be listed on the llvm.used or llvm.compiler.used global and marked as // referenced from there. ModuleSymbolTable::CollectAsmSymbols( Triple(M.getTargetTriple()), M.getModuleInlineAsm(), [&M, &Index, &CantBePromoted](StringRef Name, object::BasicSymbolRef::Flags Flags) { // Symbols not marked as Weak or Global are local definitions. if (Flags & (object::BasicSymbolRef::SF_Weak | object::BasicSymbolRef::SF_Global)) return; GlobalValue *GV = M.getNamedValue(Name); if (!GV) return; assert(GV->isDeclaration() && "Def in module asm already has definition"); GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage, /* NotEligibleToImport */ true, /* LiveRoot */ true); CantBePromoted.insert(GlobalValue::getGUID(Name)); // Create the appropriate summary type. if (isa<Function>(GV)) { std::unique_ptr<FunctionSummary> Summary = llvm::make_unique<FunctionSummary>( GVFlags, 0, ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{}, ArrayRef<GlobalValue::GUID>{}); Index.addGlobalValueSummary(Name, std::move(Summary)); } else { std::unique_ptr<GlobalVarSummary> Summary = llvm::make_unique<GlobalVarSummary>(GVFlags, ArrayRef<ValueInfo>{}); Index.addGlobalValueSummary(Name, std::move(Summary)); } }); } for (auto &GlobalList : Index) { assert(GlobalList.second.size() == 1 && "Expected module's index to have one summary per GUID"); auto &Summary = GlobalList.second[0]; bool AllRefsCanBeExternallyReferenced = llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) { return !CantBePromoted.count(VI.getValue()->getGUID()); }); if (!AllRefsCanBeExternallyReferenced) { Summary->setNotEligibleToImport(); continue; } if (auto *FuncSummary = dyn_cast<FunctionSummary>(Summary.get())) { bool AllCallsCanBeExternallyReferenced = llvm::all_of( FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) { auto GUID = Edge.first.isGUID() ? Edge.first.getGUID() : Edge.first.getValue()->getGUID(); return !CantBePromoted.count(GUID); }); if (!AllCallsCanBeExternallyReferenced) Summary->setNotEligibleToImport(); } } return Index; }
void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, ArrayRef<unsigned> RegsBeingSpilled) { SetVector<LiveInterval*, SmallVector<LiveInterval*, 8>, SmallPtrSet<LiveInterval*, 8> > ToShrink; for (;;) { // Erase all dead defs. while (!Dead.empty()) { MachineInstr *MI = Dead.pop_back_val(); assert(MI->allDefsAreDead() && "Def isn't really dead"); SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); // Never delete inline asm. if (MI->isInlineAsm()) { DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); continue; } // Use the same criteria as DeadMachineInstructionElim. bool SawStore = false; if (!MI->isSafeToMove(&TII, 0, SawStore)) { DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); continue; } DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); // Check for live intervals that may shrink for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { if (!MOI->isReg()) continue; unsigned Reg = MOI->getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; LiveInterval &LI = LIS.getInterval(Reg); // Shrink read registers, unless it is likely to be expensive and // unlikely to change anything. We typically don't want to shrink the // PIC base register that has lots of uses everywhere. // Always shrink COPY uses that probably come from live range splitting. if (MI->readsVirtualRegister(Reg) && (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || LI.killedAt(Idx))) ToShrink.insert(&LI); // Remove defined value. if (MOI->isDef()) { if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { if (TheDelegate) TheDelegate->LRE_WillShrinkVirtReg(LI.reg); LI.removeValNo(VNI); if (LI.empty()) { ToShrink.remove(&LI); eraseVirtReg(Reg); } } } } if (TheDelegate) TheDelegate->LRE_WillEraseInstruction(MI); LIS.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); ++NumDCEDeleted; } if (ToShrink.empty()) break; // Shrink just one live interval. Then delete new dead defs. LiveInterval *LI = ToShrink.back(); ToShrink.pop_back(); if (foldAsLoad(LI, Dead)) continue; if (TheDelegate) TheDelegate->LRE_WillShrinkVirtReg(LI->reg); if (!LIS.shrinkToUses(LI, &Dead)) continue; // Don't create new intervals for a register being spilled. // The new intervals would have to be spilled anyway so its not worth it. // Also they currently aren't spilled so creating them and not spilling // them results in incorrect code. bool BeingSpilled = false; for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) { if (LI->reg == RegsBeingSpilled[i]) { BeingSpilled = true; break; } } if (BeingSpilled) continue; // LI may have been separated, create new intervals. LI->RenumberValues(LIS); ConnectedVNInfoEqClasses ConEQ(LIS); unsigned NumComp = ConEQ.Classify(LI); if (NumComp <= 1) continue; ++NumFracRanges; bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg; DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); SmallVector<LiveInterval*, 8> Dups(1, LI); for (unsigned i = 1; i != NumComp; ++i) { Dups.push_back(&createFrom(LI->reg)); // If LI is an original interval that hasn't been split yet, make the new // intervals their own originals instead of referring to LI. The original // interval must contain all the split products, and LI doesn't. if (IsOriginal) VRM->setIsSplitFromReg(Dups.back()->reg, 0); if (TheDelegate) TheDelegate->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); } ConEQ.Distribute(&Dups[0], MRI); DEBUG({ for (unsigned i = 0; i != NumComp; ++i) dbgs() << '\t' << *Dups[i] << '\n'; }); }
/// handleEndBlock - Remove dead stores to stack-allocated locations in the /// function end block. Ex: /// %A = alloca i32 /// ... /// store i32 1, i32* %A /// ret void bool DSE::handleEndBlock(BasicBlock &BB) { bool MadeChange = false; // Keep track of all of the stack objects that are dead at the end of the // function. SmallPtrSet<Value*, 16> DeadStackObjects; // Find all of the alloca'd pointers in the entry block. BasicBlock *Entry = BB.getParent()->begin(); for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) { if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) DeadStackObjects.insert(AI); // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. CallInst *CI = extractMallocCall(I); if (!CI) CI = extractCallocCall(I); if (CI && !PointerMayBeCaptured(CI, true, true)) DeadStackObjects.insert(CI); } // Treat byval arguments the same, stores to them are dead at the end of the // function. for (Function::arg_iterator AI = BB.getParent()->arg_begin(), AE = BB.getParent()->arg_end(); AI != AE; ++AI) if (AI->hasByValAttr()) DeadStackObjects.insert(AI); // Scan the basic block backwards for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ --BBI; // If we find a store, check to see if it points into a dead stack value. if (hasMemoryWrite(BBI) && isRemovable(BBI)) { // See through pointer-to-pointer bitcasts SmallVector<Value *, 4> Pointers; GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers); // Stores to stack values are valid candidates for removal. bool AllDead = true; for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(), E = Pointers.end(); I != E; ++I) if (!DeadStackObjects.count(*I)) { AllDead = false; break; } if (AllDead) { Instruction *Dead = BBI++; DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " << *Dead << "\n Objects: "; for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(), E = Pointers.end(); I != E; ++I) { dbgs() << **I; if (llvm::next(I) != E) dbgs() << ", "; } dbgs() << '\n'); // DCE instructions only used to calculate that store. DeleteDeadInstruction(Dead, *MD, &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; } } // Remove any dead non-memory-mutating instructions. if (isInstructionTriviallyDead(BBI)) { Instruction *Inst = BBI++; DeleteDeadInstruction(Inst, *MD, &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; } if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) { DeadStackObjects.erase(A); continue; } if (CallInst *CI = extractMallocCall(BBI)) { DeadStackObjects.erase(CI); continue; } if (CallInst *CI = extractCallocCall(BBI)) { DeadStackObjects.erase(CI); continue; } if (CallSite CS = cast<Value>(BBI)) { // If this call does not access memory, it can't be loading any of our // pointers. if (AA->doesNotAccessMemory(CS)) continue; // If the call might load from any of our allocas, then any store above // the call is live. SmallVector<Value*, 8> LiveAllocas; for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { // See if the call site touches it. AliasAnalysis::ModRefResult A = AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA)); if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) LiveAllocas.push_back(*I); } for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), E = LiveAllocas.end(); I != E; ++I) DeadStackObjects.erase(*I); // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. if (DeadStackObjects.empty()) return MadeChange; continue; } AliasAnalysis::Location LoadedLoc; // If we encounter a use of the pointer, it is no longer considered dead if (LoadInst *L = dyn_cast<LoadInst>(BBI)) { if (!L->isUnordered()) // Be conservative with atomic/volatile load break; LoadedLoc = AA->getLocation(L); } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) { LoadedLoc = AA->getLocation(V); } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) { LoadedLoc = AA->getLocationForSource(MTI); } else if (!BBI->mayReadFromMemory()) { // Instruction doesn't read memory. Note that stores that weren't removed // above will hit this case. continue; } else { // Unknown inst; assume it clobbers everything. break; } // Remove any allocas from the DeadPointer set that are loaded, as this // makes any stores above the access live. RemoveAccessedObjects(LoadedLoc, DeadStackObjects); // If all of the allocas were clobbered by the access then we're not going // to find anything else to process. if (DeadStackObjects.empty()) break; }
void SanityCheckInstructionsPass::findInstructions(Function *F) { // A list of instructions that are used by sanity checks. They become sanity // check instructions if it turns out they're not used by anything else. SmallPtrSet<Instruction*, 128> Worklist; // A list of basic blocks that contain sanity check instructions. They // become sanity check blocks if it turns out they don't contain anything // else. SmallPtrSet<BasicBlock*, 64> BlockWorklist; // A map from instructions to the checks that use them. std::map<Instruction*, SmallPtrSet<Instruction*, 4> > ChecksByInstruction; for (BasicBlock &BB: *F) { if (findSanityCheckCall(&BB)) { SanityCheckBlocks[F].insert(&BB); // All instructions inside sanity check blocks are sanity check instructions for (Instruction &I: BB) { Worklist.insert(&I); } // All branches to sanity check blocks are sanity check branches for (User *U: BB.users()) { if (Instruction *Inst = dyn_cast<Instruction>(U)) { Worklist.insert(Inst); } BranchInst *BI = dyn_cast<BranchInst>(U); if (BI && BI->isConditional()) { SanityCheckBranches[F].insert(BI); ChecksByInstruction[BI].insert(BI); } } } } while (!Worklist.empty()) { // Alternate between emptying the worklist... while (!Worklist.empty()) { Instruction *Inst = *Worklist.begin(); Worklist.erase(Inst); if (onlyUsedInSanityChecks(Inst)) { if (SanityCheckInstructions[F].insert(Inst)) { for (Use &U: Inst->operands()) { if (Instruction *Op = dyn_cast<Instruction>(U.get())) { Worklist.insert(Op); // Copy ChecksByInstruction from Inst to Op auto CBI = ChecksByInstruction.find(Inst); if (CBI != ChecksByInstruction.end()) { ChecksByInstruction[Op].insert(CBI->second.begin(), CBI->second.end()); } } } BlockWorklist.insert(Inst->getParent()); // Fill InstructionsBySanityCheck from the inverse ChecksByInstruction auto CBI = ChecksByInstruction.find(Inst); if (CBI != ChecksByInstruction.end()) { for (Instruction *CI : CBI->second) { InstructionsBySanityCheck[CI].insert(Inst); } } } } } // ... and checking whether this causes basic blocks to contain only // sanity checks. This would in turn cause terminators to be added to // the worklist. while (!BlockWorklist.empty()) { BasicBlock *BB = *BlockWorklist.begin(); BlockWorklist.erase(BB); bool allInstructionsAreSanityChecks = true; for (Instruction &I: *BB) { if (!SanityCheckInstructions.at(BB->getParent()).count(&I)) { allInstructionsAreSanityChecks = false; break; } } if (allInstructionsAreSanityChecks) { for (User *U: BB->users()) { if (Instruction *Inst = dyn_cast<Instruction>(U)) { Worklist.insert(Inst); } } } } } }
/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst *> Invokes) { // Finally, scan the code looking for instructions with bad live ranges. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { for (BasicBlock::iterator II = BB->begin(), IIE = BB->end(); II != IIE; ++II) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. Instruction *Inst = &*II; if (Inst->use_empty()) continue; if (Inst->hasOneUse() && cast<Instruction>(Inst->user_back())->getParent() == BB && !isa<PHINode>(Inst->user_back())) continue; // If this is an alloca in the entry block, it's not a real register // value. if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin()) continue; // Avoid iterator invalidation by copying users to a temporary vector. SmallVector<Instruction *, 16> Users; for (User *U : Inst->users()) { Instruction *UI = cast<Instruction>(U); if (UI->getParent() != BB || isa<PHINode>(UI)) Users.push_back(UI); } // Find all of the blocks that this value is live in. SmallPtrSet<BasicBlock *, 64> LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); Users.pop_back(); if (!isa<PHINode>(U)) { MarkBlocksLiveIn(U->getParent(), LiveBBs); } else { // Uses for a PHI node occur in their predecessor block. PHINode *PN = cast<PHINode>(U); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == Inst) MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); } } // Now that we know all of the blocks that this thing is live in, see if // it includes any of the unwind locations. bool NeedsSpill = false; for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around " << UnwindBlock->getName() << "\n"); NeedsSpill = true; break; } } // If we decided we need a spill, do it. // FIXME: Spilling this way is overkill, as it forces all uses of // the value to be reloaded from the stack slot, even those that aren't // in the unwind blocks. We should be more selective. if (NeedsSpill) { DemoteRegToStack(*Inst, true); ++NumSpilled; } } } // Go through the landing pads and remove any PHIs there. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); // Place PHIs into a set to avoid invalidating the iterator. SmallPtrSet<PHINode *, 8> PHIsToDemote; for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) PHIsToDemote.insert(cast<PHINode>(PN)); if (PHIsToDemote.empty()) continue; // Demote the PHIs to the stack. for (PHINode *PN : PHIsToDemote) DemotePHIToStack(PN); // Move the landingpad instruction back to the top of the landing pad block. LPI->moveBefore(&UnwindBlock->front()); } }
/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" calls. The /// "unwind" part of these invokes jump to a landing pad within the current /// function. This is a candidate to merge the selector associated with the URoR /// invoke with the one from the URoR's landing pad. bool DwarfEHPrepare::HandleURoRInvokes() { if (!EHCatchAllValue) { EHCatchAllValue = F->getParent()->getNamedGlobal("llvm.eh.catch.all.value"); if (!EHCatchAllValue) return false; } if (!SelectorIntrinsic) { SelectorIntrinsic = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector); if (!SelectorIntrinsic) return false; } SmallPtrSet<IntrinsicInst*, 32> Sels; SmallPtrSet<IntrinsicInst*, 32> CatchAllSels; FindAllCleanupSelectors(Sels, CatchAllSels); if (!DT) // We require DominatorTree information. return CleanupSelectors(CatchAllSels); if (!URoR) { URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); if (!URoR) return CleanupSelectors(CatchAllSels); } SmallPtrSet<InvokeInst*, 32> URoRInvokes; FindAllURoRInvokes(URoRInvokes); SmallPtrSet<IntrinsicInst*, 32> SelsToConvert; for (SmallPtrSet<IntrinsicInst*, 32>::iterator SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) { const BasicBlock *SelBB = (*SI)->getParent(); for (SmallPtrSet<InvokeInst*, 32>::iterator UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) { const BasicBlock *URoRBB = (*UI)->getParent(); if (DT->dominates(SelBB, URoRBB)) { SelsToConvert.insert(*SI); break; } } } bool Changed = false; if (Sels.size() != SelsToConvert.size()) { // If we haven't been able to convert all of the clean-up selectors, then // loop through the slow way to see if they still need to be converted. if (!ExceptionValueIntrinsic) { ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception); if (!ExceptionValueIntrinsic) return CleanupSelectors(CatchAllSels); } for (Value::use_iterator I = ExceptionValueIntrinsic->use_begin(), E = ExceptionValueIntrinsic->use_end(); I != E; ++I) { IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I); if (!EHPtr || EHPtr->getParent()->getParent() != F) continue; Changed |= PromoteEHPtrStore(EHPtr); bool URoRInvoke = false; SmallPtrSet<IntrinsicInst*, 8> SelCalls; Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls); if (URoRInvoke) { // This EH pointer is being used by an invoke of an URoR instruction and // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we // need to convert it to a 'catch-all'. for (SmallPtrSet<IntrinsicInst*, 8>::iterator SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) if (!HasCatchAllInSelector(*SI)) SelsToConvert.insert(*SI); } } } if (!SelsToConvert.empty()) { // Convert all clean-up eh.selectors, which are associated with "invokes" of // URoR calls, into catch-all eh.selectors. Changed = true; for (SmallPtrSet<IntrinsicInst*, 8>::iterator SI = SelsToConvert.begin(), SE = SelsToConvert.end(); SI != SE; ++SI) { IntrinsicInst *II = *SI; // Use the exception object pointer and the personality function // from the original selector. CallSite CS(II); IntrinsicInst::op_iterator I = CS.arg_begin(); IntrinsicInst::op_iterator E = CS.arg_end(); IntrinsicInst::op_iterator B = prior(E); // Exclude last argument if it is an integer. if (isa<ConstantInt>(B)) E = B; // Add exception object pointer (front). // Add personality function (next). // Add in any filter IDs (rest). SmallVector<Value*, 8> Args(I, E); Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator. CallInst *NewSelector = CallInst::Create(SelectorIntrinsic, Args.begin(), Args.end(), "eh.sel.catch.all", II); NewSelector->setTailCall(II->isTailCall()); NewSelector->setAttributes(II->getAttributes()); NewSelector->setCallingConv(II->getCallingConv()); II->replaceAllUsesWith(NewSelector); II->eraseFromParent(); } } Changed |= CleanupSelectors(CatchAllSels); return Changed; }
bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, MachineFunction &MF, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII) { SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs; // FIXME: For now, we sink only to a successor which has a single predecessor // so that we can directly sink COPY instructions to the successor without // adding any new block or branch instruction. for (MachineBasicBlock *SI : CurBB.successors()) if (!SI->livein_empty() && SI->pred_size() == 1) SinkableBBs.insert(SI); if (SinkableBBs.empty()) return false; bool Changed = false; // Track which registers have been modified and used between the end of the // block and the current instruction. ModifiedRegUnits.clear(); UsedRegUnits.clear(); for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) { MachineInstr *MI = &*I; ++I; if (MI->isDebugInstr()) continue; // Do not move any instruction across function call. if (MI->isCall()) return false; if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) { LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } // Track the operand index for use in Copy. SmallVector<unsigned, 2> UsedOpsInCopy; // Track the register number defed in Copy. SmallVector<unsigned, 2> DefedRegsInCopy; // Don't sink the COPY if it would violate a register dependency. if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, ModifiedRegUnits, UsedRegUnits)) { LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) && "Unexpect SrcReg or DefReg"); MachineBasicBlock *SuccBB = getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI); // Don't sink if we cannot find a single sinkable successor in which Reg // is live-in. if (!SuccBB) { LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) && "Unexpected predecessor"); // Clear the kill flag if SrcReg is killed between MI and the end of the // block. clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); performSink(*MI, *SuccBB, InsertPos); updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); Changed = true; ++NumPostRACopySink; } return Changed; }
// Sinks \p I from the loop \p L's preheader to its uses. Returns true if // sinking is successful. // \p LoopBlockNumber is used to sort the insertion blocks to ensure // determinism. static bool sinkInstruction(Loop &L, Instruction &I, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs, const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo &BFI) { // Compute the set of blocks in loop L which contain a use of I. SmallPtrSet<BasicBlock *, 2> BBs; for (auto &U : I.uses()) { Instruction *UI = cast<Instruction>(U.getUser()); // We cannot sink I to PHI-uses. if (dyn_cast<PHINode>(UI)) return false; // We cannot sink I if it has uses outside of the loop. if (!L.contains(LI.getLoopFor(UI->getParent()))) return false; BBs.insert(UI->getParent()); } // findBBsToSinkInto is O(BBs.size() * ColdLoopBBs.size()). We cap the max // BBs.size() to avoid expensive computation. // FIXME: Handle code size growth for min_size and opt_size. if (BBs.size() > MaxNumberOfUseBBsForSinking) return false; // Find the set of BBs that we should insert a copy of I. SmallPtrSet<BasicBlock *, 2> BBsToSinkInto = findBBsToSinkInto(L, BBs, ColdLoopBBs, DT, BFI); if (BBsToSinkInto.empty()) return false; // Copy the final BBs into a vector and sort them using the total ordering // of the loop block numbers as iterating the set doesn't give a useful // order. No need to stable sort as the block numbers are a total ordering. SmallVector<BasicBlock *, 2> SortedBBsToSinkInto; SortedBBsToSinkInto.insert(SortedBBsToSinkInto.begin(), BBsToSinkInto.begin(), BBsToSinkInto.end()); std::sort(SortedBBsToSinkInto.begin(), SortedBBsToSinkInto.end(), [&](BasicBlock *A, BasicBlock *B) { return *LoopBlockNumber.find(A) < *LoopBlockNumber.find(B); }); BasicBlock *MoveBB = *SortedBBsToSinkInto.begin(); // FIXME: Optimize the efficiency for cloned value replacement. The current // implementation is O(SortedBBsToSinkInto.size() * I.num_uses()). for (BasicBlock *N : SortedBBsToSinkInto) { if (N == MoveBB) continue; // Clone I and replace its uses. Instruction *IC = I.clone(); IC->setName(I.getName()); IC->insertBefore(&*N->getFirstInsertionPt()); // Replaces uses of I with IC in N for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;) { Use &U = *UI++; auto *I = cast<Instruction>(U.getUser()); if (I->getParent() == N) U.set(IC); } // Replaces uses of I with IC in blocks dominated by N replaceDominatedUsesWith(&I, IC, DT, N); DEBUG(dbgs() << "Sinking a clone of " << I << " To: " << N->getName() << '\n'); NumLoopSunkCloned++; } DEBUG(dbgs() << "Sinking " << I << " To: " << MoveBB->getName() << '\n'); NumLoopSunk++; I.moveBefore(&*MoveBB->getFirstInsertionPt()); return true; }
void fixTerminators(MachineBasicBlock *MBB) { SmallPtrSet<MachineBasicBlock*, 2> MissedSuccs; MissedSuccs.insert(MBB->succ_begin(), MBB->succ_end()); MachineInstr *FirstTerminator = 0; for (MachineBasicBlock::iterator II = MBB->getFirstTerminator(), IE = MBB->end(); II != IE; ++II) { MachineInstr *Inst = II; if (!VInstrInfo::isBrCndLike(Inst->getOpcode())) continue; MachineBasicBlock *TargetBB = Inst->getOperand(1).getMBB(); MachineOperand Cnd = Inst->getOperand(0); //bool inserted; //jt_it at; //tie(at, inserted) = Table.insert(std::make_pair(TargetBB, Cnd)); // BranchFolding may generate code that jumping to same bb with multiple // instruction, merge the condition. //if (!inserted) { // at->second = VInstrInfo::MergePred(Cnd, at->second, *MBB, // MBB->getFirstTerminator(), &MRI, // TII, VTM::VOpOr); //} // Change the unconditional branch after conditional branch to // conditional branch. if (FirstTerminator && VInstrInfo::isUnConditionalBranch(Inst)){ MachineOperand &TrueCnd = FirstTerminator->getOperand(0); MachineOperand &FalseCnd = Inst->getOperand(0); TrueCnd.setIsKill(false); FalseCnd.setReg(TrueCnd.getReg()); FalseCnd.setTargetFlags(TrueCnd.getTargetFlags()); VInstrInfo::ReversePredicateCondition(FalseCnd); } FirstTerminator = Inst; MissedSuccs.erase(TargetBB); } // Make sure each basic block have a terminator. if (!MissedSuccs.empty()) { assert(MissedSuccs.size() == 1 && "Fall through to multiple blocks?"); ++UnconditionalBranches; MachineOperand Cnd = VInstrInfo::CreatePredicate(); if (FirstTerminator) { MachineOperand &TrueCnd = FirstTerminator->getOperand(0); assert(TrueCnd.getReg() != 0 && "Two unconditional branch?"); // We will use the register somewhere else TrueCnd.setIsKill(false); Cnd = TrueCnd; VInstrInfo::ReversePredicateCondition(Cnd); } BuildMI(MBB, DebugLoc(), VInstrInfo::getDesc(VTM::VOpToStateb)) .addOperand(Cnd).addMBB(*MissedSuccs.begin()) .addOperand(VInstrInfo::CreatePredicate()) .addOperand(VInstrInfo::CreateTrace()); } //else if (Table.size() != MBB->succ_size()) { // // Also fix the CFG. // while (!MBB->succ_empty()) // MBB->removeSuccessor(MBB->succ_end() - 1); // for (jt_it JI = Table.begin(), JE = Table.end(); JI != JE; ++JI) // MBB->addSuccessor(JI->first); // // Try to correct the CFG. // TII->RemoveBranch(*MBB); // VInstrInfo::insertJumpTable(*MBB, Table, DebugLoc()); //} //Table.clear(); if (MBB->succ_size() == 0 && MBB->getFirstTerminator() == MBB->end()) { ++Unreachables; BuildMI(MBB, DebugLoc(), VInstrInfo::getDesc(VTM::VOpUnreachable)) .addOperand(VInstrInfo::CreatePredicate()) .addOperand(VInstrInfo::CreateTrace()); } }