static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) { SmallPtrSet<CallGraphNode *, 8> SCCNodes; bool MadeChange = false; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. for (CallGraphNode *I : SCC) SCCNodes.insert(I); // First pass, scan all of the functions in the SCC, simplifying them // according to what we know. for (CallGraphNode *I : SCC) if (Function *F = I->getFunction()) MadeChange |= SimplifyFunction(F, CG); // Next, check to see if any callees might throw or if there are any external // functions in this SCC: if so, we cannot prune any functions in this SCC. // Definitions that are weak and not declared non-throwing might be // overridden at linktime with something that throws, so assume that. // If this SCC includes the unwind instruction, we KNOW it throws, so // obviously the SCC might throw. // bool SCCMightUnwind = false, SCCMightReturn = false; for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) { Function *F = (*I)->getFunction(); if (!F) { SCCMightUnwind = true; SCCMightReturn = true; } else if (!F->hasExactDefinition()) { SCCMightUnwind |= !F->doesNotThrow(); SCCMightReturn |= !F->doesNotReturn(); } else { bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow(); bool CheckReturn = !SCCMightReturn && !F->doesNotReturn(); // Determine if we should scan for InlineAsm in a naked function as it // is the only way to return without a ReturnInst. Only do this for // no-inline functions as functions which may be inlined cannot // meaningfully return via assembly. bool CheckReturnViaAsm = CheckReturn && F->hasFnAttribute(Attribute::Naked) && F->hasFnAttribute(Attribute::NoInline); if (!CheckUnwind && !CheckReturn) continue; for (const BasicBlock &BB : *F) { const TerminatorInst *TI = BB.getTerminator(); if (CheckUnwind && TI->mayThrow()) { SCCMightUnwind = true; } else if (CheckReturn && isa<ReturnInst>(TI)) { SCCMightReturn = true; } for (const Instruction &I : BB) { if ((!CheckUnwind || SCCMightUnwind) && (!CheckReturnViaAsm || SCCMightReturn)) break; // Check to see if this function performs an unwind or calls an // unwinding function. if (CheckUnwind && !SCCMightUnwind && I.mayThrow()) { bool InstMightUnwind = true; if (const auto *CI = dyn_cast<CallInst>(&I)) { if (Function *Callee = CI->getCalledFunction()) { CallGraphNode *CalleeNode = CG[Callee]; // If the callee is outside our current SCC then we may throw // because it might. If it is inside, do nothing. if (SCCNodes.count(CalleeNode) > 0) InstMightUnwind = false; } } SCCMightUnwind |= InstMightUnwind; } if (CheckReturnViaAsm && !SCCMightReturn) if (auto ICS = ImmutableCallSite(&I)) if (const auto *IA = dyn_cast<InlineAsm>(ICS.getCalledValue())) if (IA->hasSideEffects()) SCCMightReturn = true; } if (SCCMightUnwind && SCCMightReturn) break; } } } // If the SCC doesn't unwind or doesn't throw, note this fact. if (!SCCMightUnwind || !SCCMightReturn) for (CallGraphNode *I : SCC) { Function *F = I->getFunction(); if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) { F->addFnAttr(Attribute::NoUnwind); MadeChange = true; } if (!SCCMightReturn && !F->hasFnAttribute(Attribute::NoReturn)) { F->addFnAttr(Attribute::NoReturn); MadeChange = true; } } for (CallGraphNode *I : SCC) { // Convert any invoke instructions to non-throwing functions in this node // into call instructions with a branch. This makes the exception blocks // dead. if (Function *F = I->getFunction()) MadeChange |= SimplifyFunction(F, CG); } return MadeChange; }
/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure /// there is one implicit_def for each use. Add isUndef marker to /// implicit_def defs and their uses. bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" << "********** Function: " << ((Value*)fn.getFunction())->getName() << '\n'); bool Changed = false; TII = fn.getTarget().getInstrInfo(); TRI = fn.getTarget().getRegisterInfo(); MRI = &fn.getRegInfo(); LV = &getAnalysis<LiveVariables>(); SmallSet<unsigned, 8> ImpDefRegs; SmallVector<MachineInstr*, 8> ImpDefMIs; SmallVector<MachineInstr*, 4> RUses; SmallPtrSet<MachineBasicBlock*,16> Visited; SmallPtrSet<MachineInstr*, 8> ModInsts; MachineBasicBlock *Entry = fn.begin(); for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); DFI != E; ++DFI) { MachineBasicBlock *MBB = *DFI; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; if (MI->isImplicitDef()) { ImpDefMIs.push_back(MI); // Is this a sub-register read-modify-write? if (MI->getOperand(0).readsReg()) continue; unsigned Reg = MI->getOperand(0).getReg(); ImpDefRegs.insert(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) ImpDefRegs.insert(*SS); } continue; } // Eliminate %reg1032:sub<def> = COPY undef. if (MI->isCopy() && MI->getOperand(0).readsReg()) { MachineOperand &MO = MI->getOperand(1); if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { if (MO.isKill()) { LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg()); vi.removeKill(MI); } unsigned Reg = MI->getOperand(0).getReg(); MI->eraseFromParent(); Changed = true; // A REG_SEQUENCE may have been expanded into partial definitions. // If this was the last one, mark Reg as implicitly defined. if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->def_empty(Reg)) ImpDefRegs.insert(Reg); continue; } } bool ChangedToImpDef = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand& MO = MI->getOperand(i); if (!MO.isReg() || !MO.readsReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; if (!ImpDefRegs.count(Reg)) continue; // Use is a copy, just turn it into an implicit_def. if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) { bool isKill = MO.isKill(); MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) MI->RemoveOperand(j); if (isKill) { ImpDefRegs.erase(Reg); LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); vi.removeKill(MI); } ChangedToImpDef = true; Changed = true; break; } Changed = true; MO.setIsUndef(); // This is a partial register redef of an implicit def. // Make sure the whole register is defined by the instruction. if (MO.isDef()) { MI->addRegisterDefined(Reg); continue; } if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { // Make sure other reads of Reg are also marked <undef>. for (unsigned j = i+1; j != e; ++j) { MachineOperand &MOJ = MI->getOperand(j); if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg()) MOJ.setIsUndef(); } ImpDefRegs.erase(Reg); } } if (ChangedToImpDef) { // Backtrack to process this new implicit_def. --I; } else { for (unsigned i = 0; i != MI->getNumOperands(); ++i) { MachineOperand& MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; ImpDefRegs.erase(MO.getReg()); } } } // Any outstanding liveout implicit_def's? for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) { MachineInstr *MI = ImpDefMIs[i]; unsigned Reg = MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg) || !ImpDefRegs.count(Reg)) { // Delete all "local" implicit_def's. That include those which define // physical registers since they cannot be liveout. MI->eraseFromParent(); Changed = true; continue; } // If there are multiple defs of the same register and at least one // is not an implicit_def, do not insert implicit_def's before the // uses. bool Skip = false; SmallVector<MachineInstr*, 4> DeadImpDefs; for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { MachineInstr *DeadImpDef = &*DI; if (!DeadImpDef->isImplicitDef()) { Skip = true; break; } DeadImpDefs.push_back(DeadImpDef); } if (Skip) continue; // The only implicit_def which we want to keep are those that are live // out of its block. for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j) DeadImpDefs[j]->eraseFromParent(); Changed = true; // Process each use instruction once. for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), UE = MRI->use_end(); UI != UE; ++UI) { if (UI.getOperand().isUndef()) continue; MachineInstr *RMI = &*UI; if (ModInsts.insert(RMI)) RUses.push_back(RMI); } for (unsigned i = 0, e = RUses.size(); i != e; ++i) { MachineInstr *RMI = RUses[i]; // Turn a copy use into an implicit_def. if (isUndefCopy(RMI, Reg, ImpDefRegs)) { RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); bool isKill = false; SmallVector<unsigned, 4> Ops; for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { MachineOperand &RRMO = RMI->getOperand(j); if (RRMO.isReg() && RRMO.getReg() == Reg) { Ops.push_back(j); if (RRMO.isKill()) isKill = true; } } // Leave the other operands along. for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) { unsigned OpIdx = Ops[j]; RMI->RemoveOperand(OpIdx-j); } // Update LiveVariables varinfo if the instruction is a kill. if (isKill) { LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); vi.removeKill(RMI); } continue; } // Replace Reg with a new vreg that's marked implicit. const TargetRegisterClass* RC = MRI->getRegClass(Reg); unsigned NewVReg = MRI->createVirtualRegister(RC); bool isKill = true; for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { MachineOperand &RRMO = RMI->getOperand(j); if (RRMO.isReg() && RRMO.getReg() == Reg) { RRMO.setReg(NewVReg); RRMO.setIsUndef(); if (isKill) { // Only the first operand of NewVReg is marked kill. RRMO.setIsKill(); isKill = false; } } } } RUses.clear(); ModInsts.clear(); } ImpDefRegs.clear(); ImpDefMIs.clear(); } return Changed; }
/// At this point, we're committed to promoting the alloca using IDF's, and the /// standard SSA construction algorithm. Determine which blocks need phi nodes /// and see if we can optimize out some work by avoiding insertion of dead phi /// nodes. void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, AllocaInfo &Info) { // Unique the set of defining blocks for efficient lookup. SmallPtrSet<BasicBlock *, 32> DefBlocks; DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); // Determine which blocks the value is live in. These are blocks which lead // to uses. SmallPtrSet<BasicBlock *, 32> LiveInBlocks; ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); // Use a priority queue keyed on dominator tree level so that inserted nodes // are handled from the bottom of the dominator tree upwards. typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair; typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, less_second> IDFPriorityQueue; IDFPriorityQueue PQ; for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(), E = DefBlocks.end(); I != E; ++I) { if (DomTreeNode *Node = DT.getNode(*I)) PQ.push(std::make_pair(Node, DomLevels[Node])); } SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks; SmallPtrSet<DomTreeNode *, 32> Visited; SmallVector<DomTreeNode *, 32> Worklist; while (!PQ.empty()) { DomTreeNodePair RootPair = PQ.top(); PQ.pop(); DomTreeNode *Root = RootPair.first; unsigned RootLevel = RootPair.second; // Walk all dominator tree children of Root, inspecting their CFG edges with // targets elsewhere on the dominator tree. Only targets whose level is at // most Root's level are added to the iterated dominance frontier of the // definition set. Worklist.clear(); Worklist.push_back(Root); while (!Worklist.empty()) { DomTreeNode *Node = Worklist.pop_back_val(); BasicBlock *BB = Node->getBlock(); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { DomTreeNode *SuccNode = DT.getNode(*SI); // Quickly skip all CFG edges that are also dominator tree edges instead // of catching them below. if (SuccNode->getIDom() == Node) continue; unsigned SuccLevel = DomLevels[SuccNode]; if (SuccLevel > RootLevel) continue; if (!Visited.insert(SuccNode)) continue; BasicBlock *SuccBB = SuccNode->getBlock(); if (!LiveInBlocks.count(SuccBB)) continue; DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); if (!DefBlocks.count(SuccBB)) PQ.push(std::make_pair(SuccNode, SuccLevel)); } for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; ++CI) { if (!Visited.count(*CI)) Worklist.push_back(*CI); } } } if (DFBlocks.size() > 1) std::sort(DFBlocks.begin(), DFBlocks.end()); unsigned CurrentVersion = 0; for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); }
/// InlineCallIfPossible - If it is possible to inline the specified call site, /// do so and update the CallGraph for this operation. /// /// This function also does some basic book-keeping to update the IR. The /// InlinedArrayAllocas map keeps track of any allocas that are already /// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); // Try to inline the function. Get the list of static allocas that were // inlined. if (!InlineFunction(CS, IFI, InsertLifetime)) return false; // If the inlined function had a higher stack protection level than the // calling function, then bump up the caller's stack protection level. if (Callee->hasFnAttr(Attribute::StackProtectReq)) Caller->addFnAttr(Attribute::StackProtectReq); else if (Callee->hasFnAttr(Attribute::StackProtect) && !Caller->hasFnAttr(Attribute::StackProtectReq)) Caller->addFnAttr(Attribute::StackProtect); // Look at all of the allocas that we inlined through this call site. If we // have already inlined other allocas through other calls into this function, // then we know that they have disjoint lifetimes and that we can merge them. // // There are many heuristics possible for merging these allocas, and the // different options have different tradeoffs. One thing that we *really* // don't want to hurt is SRoA: once inlining happens, often allocas are no // longer address taken and so they can be promoted. // // Our "solution" for that is to only merge allocas whose outermost type is an // array type. These are usually not promoted because someone is using a // variable index into them. These are also often the most important ones to // merge. // // A better solution would be to have real memory lifetime markers in the IR // and not have the inliner do any merging of allocas at all. This would // allow the backend to do proper stack slot coloring of all allocas that // *actually make it to the backend*, which is really what we want. // // Because we don't have this information, we do this simple and useful hack. // SmallPtrSet<AllocaInst*, 16> UsedAllocas; // When processing our SCC, check to see if CS was inlined from some other // call site. For example, if we're processing "A" in this code: // A() { B() } // B() { x = alloca ... C() } // C() { y = alloca ... } // Assume that C was not inlined into B initially, and so we're processing A // and decide to inline B into A. Doing this makes an alloca available for // reuse and makes a callsite (C) available for inlining. When we process // the C call site we don't want to do any alloca merging between X and Y // because their scopes are not disjoint. We could make this smarter by // keeping track of the inline history for each alloca in the // InlinedArrayAllocas but this isn't likely to be a significant win. if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. return true; // Loop over all the allocas we have so far and see if they can be merged with // a previously inlined alloca. If not, remember that we had it. for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e; ++AllocaNo) { AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; // Don't bother trying to merge array allocations (they will usually be // canonicalized to be an allocation *of* an array), or allocations whose // type is not itself an array (because we're afraid of pessimizing SRoA). ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); if (ATy == 0 || AI->isArrayAllocation()) continue; // Get the list of all available allocas for this array type. std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy]; // Loop over the allocas in AllocasForType to see if we can reuse one. Note // that we have to be careful not to reuse the same "available" alloca for // multiple different allocas that we just inlined, we use the 'UsedAllocas' // set to keep track of which "available" allocas are being used by this // function. Also, AllocasForType can be empty of course! bool MergedAwayAlloca = false; for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { AllocaInst *AvailableAlloca = AllocasForType[i]; // The available alloca has to be in the right function, not in some other // function in this SCC. if (AvailableAlloca->getParent() != AI->getParent()) continue; // If the inlined function already uses this alloca then we can't reuse // it. if (!UsedAllocas.insert(AvailableAlloca)) continue; // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare // success! DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: " << *AvailableAlloca << '\n'); AI->replaceAllUsesWith(AvailableAlloca); AI->eraseFromParent(); MergedAwayAlloca = true; ++NumMergedAllocas; IFI.StaticAllocas[AllocaNo] = 0; break; } // If we already nuked the alloca, we're done with it. if (MergedAwayAlloca) continue; // If we were unable to merge away the alloca either because there are no // allocas of the right type available or because we reused them all // already, remember that this alloca came from an inlined function and mark // it used so we don't reuse it for other allocas from this inline // operation. AllocasForType.push_back(AI); UsedAllocas.insert(AI); } return true; }
bool PruneEH::runOnSCC(CallGraphSCC &SCC) { SmallPtrSet<CallGraphNode *, 8> SCCNodes; CallGraph &CG = getAnalysis<CallGraph>(); bool MadeChange = false; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) SCCNodes.insert(*I); // First pass, scan all of the functions in the SCC, simplifying them // according to what we know. for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) if (Function *F = (*I)->getFunction()) MadeChange |= SimplifyFunction(F); // Next, check to see if any callees might throw or if there are any external // functions in this SCC: if so, we cannot prune any functions in this SCC. // Definitions that are weak and not declared non-throwing might be // overridden at linktime with something that throws, so assume that. // If this SCC includes the unwind instruction, we KNOW it throws, so // obviously the SCC might throw. // bool SCCMightUnwind = false, SCCMightReturn = false; for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) { Function *F = (*I)->getFunction(); if (F == 0) { SCCMightUnwind = true; SCCMightReturn = true; } else if (F->isDeclaration() || F->mayBeOverridden()) { SCCMightUnwind |= !F->doesNotThrow(); SCCMightReturn |= !F->doesNotReturn(); } else { bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow(); bool CheckReturn = !SCCMightReturn && !F->doesNotReturn(); if (!CheckUnwind && !CheckReturn) continue; // Check to see if this function performs an unwind or calls an // unwinding function. for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { if (CheckUnwind && isa<ResumeInst>(BB->getTerminator())) { // Uses unwind / resume! SCCMightUnwind = true; } else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) { SCCMightReturn = true; } // Invoke instructions don't allow unwinding to continue, so we are // only interested in call instructions. if (CheckUnwind && !SCCMightUnwind) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (CI->doesNotThrow()) { // This call cannot throw. } else if (Function *Callee = CI->getCalledFunction()) { CallGraphNode *CalleeNode = CG[Callee]; // If the callee is outside our current SCC then we may // throw because it might. if (!SCCNodes.count(CalleeNode)) { SCCMightUnwind = true; break; } } else { // Indirect call, it might throw. SCCMightUnwind = true; break; } } if (SCCMightUnwind && SCCMightReturn) break; } } } // If the SCC doesn't unwind or doesn't throw, note this fact. if (!SCCMightUnwind || !SCCMightReturn) for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { AttrBuilder NewAttributes; if (!SCCMightUnwind) NewAttributes.addAttribute(Attribute::NoUnwind); if (!SCCMightReturn) NewAttributes.addAttribute(Attribute::NoReturn); Function *F = (*I)->getFunction(); const AttributeSet &PAL = F->getAttributes(); const AttributeSet &NPAL = PAL.addFnAttributes(F->getContext(), AttributeSet::get(F->getContext(), AttributeSet::FunctionIndex, NewAttributes)); if (PAL != NPAL) { MadeChange = true; F->setAttributes(NPAL); } } for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { // Convert any invoke instructions to non-throwing functions in this node // into call instructions with a branch. This makes the exception blocks // dead. if (Function *F = (*I)->getFunction()) MadeChange |= SimplifyFunction(F); } return MadeChange; }
/// OptimizeMemoryInst - Load and Store Instructions often have /// addressing modes that can do significant amounts of computation. As such, /// instruction selection will try to get the load or store to do as much /// computation as possible for the program. The problem is that isel can only /// see within a single block. As such, we sink as much legal addressing mode /// stuff into the block as possible. /// /// This method is used to optimize both load/store and inline asms with memory /// operands. bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy) { Value *Repl = Addr; // Try to collapse single-value PHI nodes. This is necessary to undo // unprofitable PRE transformations. SmallVector<Value*, 8> worklist; SmallPtrSet<Value*, 16> Visited; worklist.push_back(Addr); // Use a worklist to iteratively look through PHI nodes, and ensure that // the addressing mode obtained from the non-PHI roots of the graph // are equivalent. Value *Consensus = 0; unsigned NumUsesConsensus = 0; bool IsNumUsesConsensusValid = false; SmallVector<Instruction*, 16> AddrModeInsts; ExtAddrMode AddrMode; while (!worklist.empty()) { Value *V = worklist.back(); worklist.pop_back(); // Break use-def graph loops. if (!Visited.insert(V)) { Consensus = 0; break; } // For a PHI node, push all of its incoming values. if (PHINode *P = dyn_cast<PHINode>(V)) { for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) worklist.push_back(P->getIncomingValue(i)); continue; } // For non-PHIs, determine the addressing mode being computed. SmallVector<Instruction*, 16> NewAddrModeInsts; ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI); // This check is broken into two cases with very similar code to avoid using // getNumUses() as much as possible. Some values have a lot of uses, so // calling getNumUses() unconditionally caused a significant compile-time // regression. if (!Consensus) { Consensus = V; AddrMode = NewAddrMode; AddrModeInsts = NewAddrModeInsts; continue; } else if (NewAddrMode == AddrMode) { if (!IsNumUsesConsensusValid) { NumUsesConsensus = Consensus->getNumUses(); IsNumUsesConsensusValid = true; } // Ensure that the obtained addressing mode is equivalent to that obtained // for all other roots of the PHI traversal. Also, when choosing one // such root as representative, select the one with the most uses in order // to keep the cost modeling heuristics in AddressingModeMatcher // applicable. unsigned NumUses = V->getNumUses(); if (NumUses > NumUsesConsensus) { Consensus = V; NumUsesConsensus = NumUses; AddrModeInsts = NewAddrModeInsts; } continue; } Consensus = 0; break; } // If the addressing mode couldn't be determined, or if multiple different // ones were determined, bail out now. if (!Consensus) return false; // Check to see if any of the instructions supersumed by this addr mode are // non-local to I's BB. bool AnyNonLocal = false; for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { AnyNonLocal = true; break; } } // If all the instructions matched are already in this BB, don't do anything. if (!AnyNonLocal) { DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); return false; } // Insert this computation right after this user. Since our caller is // scanning from the top of the BB to the bottom, reuse of the expr are // guaranteed to happen later. IRBuilder<> Builder(MemoryInst); // Now that we determined the addressing expression we want to use and know // that we have to sink it into this block. Check to see if we have already // done this for some other load/store instr in this block. If so, reuse the // computation. Value *&SunkAddr = SunkAddrs[Addr]; if (SunkAddr) { DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); } else { DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); Type *IntPtrTy = TLI->getTargetData()->getIntPtrType(AccessTy->getContext()); Value *Result = 0; // Start with the base register. Do this first so that subsequent address // matching finds it last, which will prevent it from trying to match it // as the scaled value in case it happens to be a mul. That would be // problematic if we've sunk a different mul for the scale, because then // we'd end up sinking both muls. if (AddrMode.BaseReg) { Value *V = AddrMode.BaseReg; if (V->getType()->isPointerTy()) V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); if (V->getType() != IntPtrTy) V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); Result = V; } // Add the scale value. if (AddrMode.Scale) { Value *V = AddrMode.ScaledReg; if (V->getType() == IntPtrTy) { // done. } else if (V->getType()->isPointerTy()) { V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth()) { V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } else { V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr"); } if (AddrMode.Scale != 1) V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), "sunkaddr"); if (Result) Result = Builder.CreateAdd(Result, V, "sunkaddr"); else Result = V; } // Add in the BaseGV if present. if (AddrMode.BaseGV) { Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); if (Result) Result = Builder.CreateAdd(Result, V, "sunkaddr"); else Result = V; } // Add in the Base Offset if present. if (AddrMode.BaseOffs) { Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); if (Result) Result = Builder.CreateAdd(Result, V, "sunkaddr"); else Result = V; } if (Result == 0) SunkAddr = Constant::getNullValue(Addr->getType()); else SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); } MemoryInst->replaceUsesOfWith(Repl, SunkAddr); // If we have no uses, recursively delete the value and all dead instructions // using it. if (Repl->use_empty()) { // This can cause recursive deletion, which can invalidate our iterator. // Use a WeakVH to hold onto it in case this happens. WeakVH IterHandle(CurInstIterator); BasicBlock *BB = CurInstIterator->getParent(); RecursivelyDeleteTriviallyDeadInstructions(Repl); if (IterHandle != CurInstIterator) { // If the iterator instruction was recursively deleted, start over at the // start of the block. CurInstIterator = BB->begin(); SunkAddrs.clear(); } else { // This address is now available for reassignment, so erase the table // entry; we don't want to match some completely different instruction. SunkAddrs[Addr] = 0; } } ++NumMemoryInsts; return true; }
bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); LoopInfo *LI = &getAnalysis<LoopInfo>(); const DataLayout *TD = getAnalysisIfAvailable<DataLayout>(); const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); array_pod_sort(ExitBlocks.begin(), ExitBlocks.end()); SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; // The bit we are stealing from the pointer represents whether this basic // block is the header of a subloop, in which case we only process its phis. typedef PointerIntPair<BasicBlock*, 1> WorklistItem; SmallVector<WorklistItem, 16> VisitStack; SmallPtrSet<BasicBlock*, 32> Visited; bool Changed = false; bool LocalChanged; do { LocalChanged = false; VisitStack.clear(); Visited.clear(); VisitStack.push_back(WorklistItem(L->getHeader(), false)); while (!VisitStack.empty()) { WorklistItem Item = VisitStack.pop_back_val(); BasicBlock *BB = Item.getPointer(); bool IsSubloopHeader = Item.getInt(); // Simplify instructions in the current basic block. for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { Instruction *I = BI++; // The first time through the loop ToSimplify is empty and we try to // simplify all instructions. On later iterations ToSimplify is not // empty and we only bother simplifying instructions that are in it. if (!ToSimplify->empty() && !ToSimplify->count(I)) continue; // Don't bother simplifying unused instructions. if (!I->use_empty()) { Value *V = SimplifyInstruction(I, TD, TLI, DT); if (V && LI->replacementPreservesLCSSAForm(I, V)) { // Mark all uses for resimplification next time round the loop. for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) Next->insert(cast<Instruction>(*UI)); I->replaceAllUsesWith(V); LocalChanged = true; ++NumSimplified; } } LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); if (IsSubloopHeader && !isa<PHINode>(I)) break; } // Add all successors to the worklist, except for loop exit blocks and the // bodies of subloops. We visit the headers of loops so that we can process // their phis, but we contract the rest of the subloop body and only follow // edges leading back to the original loop. for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { BasicBlock *SuccBB = *SI; if (!Visited.insert(SuccBB)) continue; const Loop *SuccLoop = LI->getLoopFor(SuccBB); if (SuccLoop && SuccLoop->getHeader() == SuccBB && L->contains(SuccLoop)) { VisitStack.push_back(WorklistItem(SuccBB, true)); SmallVector<BasicBlock*, 8> SubLoopExitBlocks; SuccLoop->getExitBlocks(SubLoopExitBlocks); for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) { BasicBlock *ExitBB = SubLoopExitBlocks[i]; if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB)) VisitStack.push_back(WorklistItem(ExitBB, false)); } continue; } bool IsExitBlock = std::binary_search(ExitBlocks.begin(), ExitBlocks.end(), SuccBB); if (IsExitBlock) continue; VisitStack.push_back(WorklistItem(SuccBB, false)); } } // Place the list of instructions to simplify on the next loop iteration // into ToSimplify. std::swap(ToSimplify, Next); Next->clear(); Changed |= LocalChanged; } while (LocalChanged); return Changed; }
/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking /// stores out of the loop and moving loads to before the loop. We do this by /// looping over the stores in the loop, looking for stores to Must pointers /// which are loop invariant. We promote these memory locations to use allocas /// instead. These allocas can easily be raised to register values by the /// PromoteMem2Reg functionality. /// void LICM::PromoteValuesInLoop() { // PromotedValues - List of values that are promoted out of the loop. Each // value has an alloca instruction for it, and a canonical version of the // pointer. std::vector<std::pair<AllocaInst*, Value*> > PromotedValues; std::map<Value*, AllocaInst*> ValueToAllocaMap; // Map of ptr to alloca FindPromotableValuesInLoop(PromotedValues, ValueToAllocaMap); if (ValueToAllocaMap.empty()) return; // If there are values to promote. Changed = true; NumPromoted += PromotedValues.size(); std::vector<Value*> PointerValueNumbers; // Emit a copy from the value into the alloca'd value in the loop preheader TerminatorInst *LoopPredInst = Preheader->getTerminator(); for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { Value *Ptr = PromotedValues[i].second; // If we are promoting a pointer value, update alias information for the // inserted load. Value *LoadValue = 0; if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) { // Locate a load or store through the pointer, and assign the same value // to LI as we are loading or storing. Since we know that the value is // stored in this loop, this will always succeed. for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end(); UI != E; ++UI) { User *U = *UI; if (LoadInst *LI = dyn_cast<LoadInst>(U)) { LoadValue = LI; break; } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { if (SI->getOperand(1) == Ptr) { LoadValue = SI->getOperand(0); break; } } } assert(LoadValue && "No store through the pointer found!"); PointerValueNumbers.push_back(LoadValue); // Remember this for later. } // Load from the memory we are promoting. LoadInst *LI = new LoadInst(Ptr, Ptr->getName()+".promoted", LoopPredInst); if (LoadValue) CurAST->copyValue(LoadValue, LI); // Store into the temporary alloca. new StoreInst(LI, PromotedValues[i].first, LoopPredInst); } // Scan the basic blocks in the loop, replacing uses of our pointers with // uses of the allocas in question. // for (Loop::block_iterator I = CurLoop->block_begin(), E = CurLoop->block_end(); I != E; ++I) { BasicBlock *BB = *I; // Rewrite all loads and stores in the block of the pointer... for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { if (LoadInst *L = dyn_cast<LoadInst>(II)) { std::map<Value*, AllocaInst*>::iterator I = ValueToAllocaMap.find(L->getOperand(0)); if (I != ValueToAllocaMap.end()) L->setOperand(0, I->second); // Rewrite load instruction... } else if (StoreInst *S = dyn_cast<StoreInst>(II)) { std::map<Value*, AllocaInst*>::iterator I = ValueToAllocaMap.find(S->getOperand(1)); if (I != ValueToAllocaMap.end()) S->setOperand(1, I->second); // Rewrite store instruction... } } } // Now that the body of the loop uses the allocas instead of the original // memory locations, insert code to copy the alloca value back into the // original memory location on all exits from the loop. Note that we only // want to insert one copy of the code in each exit block, though the loop may // exit to the same block more than once. // SmallPtrSet<BasicBlock*, 16> ProcessedBlocks; SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getExitBlocks(ExitBlocks); for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { if (!ProcessedBlocks.insert(ExitBlocks[i])) continue; // Copy all of the allocas into their memory locations. BasicBlock::iterator BI = ExitBlocks[i]->getFirstNonPHI(); Instruction *InsertPos = BI; unsigned PVN = 0; for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { // Load from the alloca. LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos); // If this is a pointer type, update alias info appropriately. if (LI->getType()->isPointerTy()) CurAST->copyValue(PointerValueNumbers[PVN++], LI); // Store into the memory we promoted. new StoreInst(LI, PromotedValues[i].second, InsertPos); } } // Now that we have done the deed, use the mem2reg functionality to promote // all of the new allocas we just created into real SSA registers. // std::vector<AllocaInst*> PromotedAllocas; PromotedAllocas.reserve(PromotedValues.size()); for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) PromotedAllocas.push_back(PromotedValues[i].first); PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST); }
/// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. /// CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. if (!F || !F->hasLocalLinkage()) return 0; // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs; unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgNo) if (I->getType()->isPointerTy()) PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo)); if (PointerArgs.empty()) return 0; // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function // is self-recursive. bool isSelfRecursive = false; for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E; ++UI) { CallSite CS(*UI); // Must be a direct call. if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0; if (CS.getInstruction()->getParent()->getParent() == F) isSelfRecursive = true; } // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; SmallPtrSet<Argument*, 8> ByValArgsToTransform; for (unsigned i = 0; i != PointerArgs.size(); ++i) { bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal); Argument *PtrArg = PointerArgs[i].first; Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe. if (isByVal) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" << " than " << maxElements << " arguments to the function.\n"); continue; } // If all the elements are single-value types, we can promote it. bool AllSimple = true; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { if (!STy->getElementType(i)->isSingleValueType()) { AllSimple = false; break; } } // Safe to transform, don't even bother trying to "promote" it. // Passing the elements as a scalar will allow scalarrepl to hack on // the new alloca we introduce. if (AllSimple) { ByValArgsToTransform.insert(PtrArg); continue; } } } // If the argument is a recursive type and we're in a recursive // function, we could end up infinitely peeling the function argument. if (isSelfRecursive) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { bool RecursiveType = false; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { if (STy->getElementType(i) == PtrArg->getType()) { RecursiveType = true; break; } } if (RecursiveType) continue; } } // Otherwise, see if we can promote the pointer to its value. if (isSafeToPromoteArgument(PtrArg, isByVal)) ArgsToPromote.insert(PtrArg); } // No promotable pointer arguments. if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return 0; return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); }
/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. /// This function finds loads of the same base and different offsets. If the /// offsets are not far apart (target specific), it add MVT::Glue inputs and /// outputs to ensure they are scheduled together and in order. This /// optimization may benefit some targets by improving cache locality. void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { SDNode *Chain = nullptr; unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) Chain = Node->getOperand(NumOps-1).getNode(); if (!Chain) return; // Look for other loads of the same chain. Find loads that are loading from // the same base pointer and different offsets. SmallPtrSet<SDNode*, 16> Visited; SmallVector<int64_t, 4> Offsets; DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. bool Cluster = false; SDNode *Base = Node; // This algorithm requires a reasonably low use count before finding a match // to avoid uselessly blowing up compile time in large blocks. unsigned UseCount = 0; for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); I != E && UseCount < 100; ++I, ++UseCount) { SDNode *User = *I; if (User == Node || !Visited.insert(User)) continue; int64_t Offset1, Offset2; if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || Offset1 == Offset2) // FIXME: Should be ok if they addresses are identical. But earlier // optimizations really should have eliminated one of the loads. continue; if (O2SMap.insert(std::make_pair(Offset1, Base)).second) Offsets.push_back(Offset1); O2SMap.insert(std::make_pair(Offset2, User)); Offsets.push_back(Offset2); if (Offset2 < Offset1) Base = User; Cluster = true; // Reset UseCount to allow more matches. UseCount = 0; } if (!Cluster) return; // Sort them in increasing order. std::sort(Offsets.begin(), Offsets.end()); // Check if the loads are close enough. SmallVector<SDNode*, 4> Loads; unsigned NumLoads = 0; int64_t BaseOff = Offsets[0]; SDNode *BaseLoad = O2SMap[BaseOff]; Loads.push_back(BaseLoad); for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { int64_t Offset = Offsets[i]; SDNode *Load = O2SMap[Offset]; if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads)) break; // Stop right here. Ignore loads that are further away. Loads.push_back(Load); ++NumLoads; } if (NumLoads == 0) return; // Cluster loads by adding MVT::Glue outputs and inputs. This also // ensure they are scheduled in order of increasing addresses. SDNode *Lead = Loads[0]; SDValue InGlue = SDValue(nullptr, 0); if (AddGlue(Lead, InGlue, true, DAG)) InGlue = SDValue(Lead, Lead->getNumValues() - 1); for (unsigned I = 1, E = Loads.size(); I != E; ++I) { bool OutGlue = I < E - 1; SDNode *Load = Loads[I]; // If AddGlue fails, we could leave an unsused glue value. This should not // cause any if (AddGlue(Load, InGlue, OutGlue, DAG)) { if (OutGlue) InGlue = SDValue(Load, Load->getNumValues() - 1); ++LoadsClustered; } else if (!OutGlue && InGlue.getNode()) RemoveUnusedGlue(InGlue.getNode(), DAG); } }
void ScheduleDAGSDNodes::BuildSchedUnits() { // During scheduling, the NodeId field of SDNode is used to map SDNodes // to their associated SUnits by holding SUnits table indices. A value // of -1 means the SDNode does not yet have an associated SUnit. unsigned NumNodes = 0; for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), E = DAG->allnodes_end(); NI != E; ++NI) { NI->setNodeId(-1); ++NumNodes; } // Reserve entries in the vector for each of the SUnits we are creating. This // ensure that reallocation of the vector won't happen, so SUnit*'s won't get // invalidated. // FIXME: Multiply by 2 because we may clone nodes during scheduling. // This is a temporary workaround. SUnits.reserve(NumNodes * 2); // Add all nodes in depth first order. SmallVector<SDNode*, 64> Worklist; SmallPtrSet<SDNode*, 64> Visited; Worklist.push_back(DAG->getRoot().getNode()); Visited.insert(DAG->getRoot().getNode()); SmallVector<SUnit*, 8> CallSUnits; while (!Worklist.empty()) { SDNode *NI = Worklist.pop_back_val(); // Add all operands to the worklist unless they've already been added. for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i) if (Visited.insert(NI->getOperand(i).getNode())) Worklist.push_back(NI->getOperand(i).getNode()); if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. continue; // If this node has already been processed, stop now. if (NI->getNodeId() != -1) continue; SUnit *NodeSUnit = newSUnit(NI); // See if anything is glued to this node, if so, add them to glued // nodes. Nodes can have at most one glue input and one glue output. Glue // is required to be the last operand and result of a node. // Scan up to find glued preds. SDNode *N = NI; while (N->getNumOperands() && N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) { N = N->getOperand(N->getNumOperands()-1).getNode(); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) NodeSUnit->isCall = true; } // Scan down to find any glued succs. N = NI; while (N->getValueType(N->getNumValues()-1) == MVT::Glue) { SDValue GlueVal(N, N->getNumValues()-1); // There are either zero or one users of the Glue result. bool HasGlueUse = false; for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E; ++UI) if (GlueVal.isOperandOf(*UI)) { HasGlueUse = true; assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); N = *UI; if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) NodeSUnit->isCall = true; break; } if (!HasGlueUse) break; } if (NodeSUnit->isCall) CallSUnits.push_back(NodeSUnit); // Schedule zero-latency TokenFactor below any nodes that may increase the // schedule height. Otherwise, ancestors of the TokenFactor may appear to // have false stalls. if (NI->getOpcode() == ISD::TokenFactor) NodeSUnit->isScheduleLow = true; // If there are glue operands involved, N is now the bottom-most node // of the sequence of nodes that are glued together. // Update the SUnit. NodeSUnit->setNode(N); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); // Compute NumRegDefsLeft. This must be done before AddSchedEdges. InitNumRegDefsLeft(NodeSUnit); // Assign the Latency field of NodeSUnit using target-provided information. computeLatency(NodeSUnit); } // Find all call operands. while (!CallSUnits.empty()) { SUnit *SU = CallSUnits.pop_back_val(); for (const SDNode *SUNode = SU->getNode(); SUNode; SUNode = SUNode->getGluedNode()) { if (SUNode->getOpcode() != ISD::CopyToReg) continue; SDNode *SrcN = SUNode->getOperand(2).getNode(); if (isPassiveNode(SrcN)) continue; // Not scheduled. SUnit *SrcSU = &SUnits[SrcN->getNodeId()]; SrcSU->isCallOp = true; } } }
/// Deduce nocapture attributes for the SCC. static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { bool Changed = false; ArgumentGraph AG; AttrBuilder B; B.addAttribute(Attribute::NoCapture); // Check each function in turn, determining which pointer arguments are not // captured. for (Function *F : SCCNodes) { // We can infer and propagate function attributes only when we know that the // definition we'll get at link time is *exactly* the definition we see now. // For more details, see GlobalValue::mayBeDerefined. if (!F->hasExactDefinition()) continue; // Functions that are readonly (or readnone) and nounwind and don't return // a value can't capture arguments. Don't analyze them. if (F->onlyReadsMemory() && F->doesNotThrow() && F->getReturnType()->isVoidTy()) { for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; ++A) { if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B)); ++NumNoCapture; Changed = true; } } continue; } for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; ++A) { if (!A->getType()->isPointerTy()) continue; bool HasNonLocalUses = false; if (!A->hasNoCaptureAttr()) { ArgumentUsesTracker Tracker(SCCNodes); PointerMayBeCaptured(&*A, &Tracker); if (!Tracker.Captured) { if (Tracker.Uses.empty()) { // If it's trivially not captured, mark it nocapture now. A->addAttr( AttributeSet::get(F->getContext(), A->getArgNo() + 1, B)); ++NumNoCapture; Changed = true; } else { // If it's not trivially captured and not trivially not captured, // then it must be calling into another function in our SCC. Save // its particulars for Argument-SCC analysis later. ArgumentGraphNode *Node = AG[&*A]; for (SmallVectorImpl<Argument *>::iterator UI = Tracker.Uses.begin(), UE = Tracker.Uses.end(); UI != UE; ++UI) { Node->Uses.push_back(AG[*UI]); if (*UI != &*A) HasNonLocalUses = true; } } } // Otherwise, it's captured. Don't bother doing SCC analysis on it. } if (!HasNonLocalUses && !A->onlyReadsMemory()) { // Can we determine that it's readonly/readnone without doing an SCC? // Note that we don't allow any calls at all here, or else our result // will be dependent on the iteration order through the functions in the // SCC. SmallPtrSet<Argument *, 8> Self; Self.insert(&*A); Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); if (R != Attribute::None) { AttrBuilder B; B.addAttribute(R); A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); Changed = true; R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; } } } } // The graph we've collected is partial because we stopped scanning for // argument uses once we solved the argument trivially. These partial nodes // show up as ArgumentGraphNode objects with an empty Uses list, and for // these nodes the final decision about whether they capture has already been // made. If the definition doesn't have a 'nocapture' attribute by now, it // captures. for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; if (ArgumentSCC.size() == 1) { if (!ArgumentSCC[0]->Definition) continue; // synthetic root node // eg. "void f(int* x) { if (...) f(x); }" if (ArgumentSCC[0]->Uses.size() == 1 && ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { Argument *A = ArgumentSCC[0]->Definition; A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); ++NumNoCapture; Changed = true; } continue; } bool SCCCaptured = false; for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) { ArgumentGraphNode *Node = *I; if (Node->Uses.empty()) { if (!Node->Definition->hasNoCaptureAttr()) SCCCaptured = true; } } if (SCCCaptured) continue; SmallPtrSet<Argument *, 8> ArgumentSCCNodes; // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for // quickly looking up whether a given Argument is in this ArgumentSCC. for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) { ArgumentSCCNodes.insert((*I)->Definition); } for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) { ArgumentGraphNode *N = *I; for (SmallVectorImpl<ArgumentGraphNode *>::iterator UI = N->Uses.begin(), UE = N->Uses.end(); UI != UE; ++UI) { Argument *A = (*UI)->Definition; if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) continue; SCCCaptured = true; break; } } if (SCCCaptured) continue; for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); ++NumNoCapture; Changed = true; } // We also want to compute readonly/readnone. With a small number of false // negatives, we can assume that any pointer which is captured isn't going // to be provably readonly or readnone, since by definition we can't // analyze all uses of a captured pointer. // // The false negatives happen when the pointer is captured by a function // that promises readonly/readnone behaviour on the pointer, then the // pointer's lifetime ends before anything that writes to arbitrary memory. // Also, a readonly/readnone pointer may be returned, but returning a // pointer is capturing it. Attribute::AttrKind ReadAttr = Attribute::ReadNone; for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; Attribute::AttrKind K = determinePointerReadAttrs(A, ArgumentSCCNodes); if (K == Attribute::ReadNone) continue; if (K == Attribute::ReadOnly) { ReadAttr = Attribute::ReadOnly; continue; } ReadAttr = K; break; } if (ReadAttr != Attribute::None) { AttrBuilder B, R; B.addAttribute(ReadAttr); R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; // Clear out existing readonly/readnone attributes A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, R)); A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; Changed = true; } } } return Changed; }
static bool markAliveBlocks(BasicBlock *BB, SmallPtrSet<BasicBlock*, 128> &Reachable) { SmallVector<BasicBlock*, 128> Worklist; Worklist.push_back(BB); bool Changed = false; do { BB = Worklist.pop_back_val(); if (!Reachable.insert(BB)) continue; // Do a quick scan of the basic block, turning any obviously unreachable // instructions into LLVM unreachable insts. The instruction combining pass // canonicalizes unreachable insts into stores to null or undef. for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){ if (CallInst *CI = dyn_cast<CallInst>(BBI)) { if (CI->doesNotReturn()) { // If we found a call to a no-return function, insert an unreachable // instruction after it. Make sure there isn't *already* one there // though. ++BBI; if (!isa<UnreachableInst>(BBI)) { // Don't insert a call to llvm.trap right before the unreachable. changeToUnreachable(BBI, false); Changed = true; } break; } } // Store to undef and store to null are undefined and used to signal that // they should be changed to unreachable by passes that can't modify the // CFG. if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { // Don't touch volatile stores. if (SI->isVolatile()) continue; Value *Ptr = SI->getOperand(1); if (isa<UndefValue>(Ptr) || (isa<ConstantPointerNull>(Ptr) && SI->getPointerAddressSpace() == 0)) { changeToUnreachable(SI, true); Changed = true; break; } } } // Turn invokes that call 'nounwind' functions into ordinary calls. if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { Value *Callee = II->getCalledValue(); if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { changeToUnreachable(II, true); Changed = true; } else if (II->doesNotThrow()) { if (II->use_empty() && II->onlyReadsMemory()) { // jump to the normal destination branch. BranchInst::Create(II->getNormalDest(), II); II->getUnwindDest()->removePredecessor(II->getParent()); II->eraseFromParent(); } else changeToCall(II); Changed = true; } } Changed |= ConstantFoldTerminator(BB, true); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) Worklist.push_back(*SI); } while (!Worklist.empty()); return Changed; }
/// findValueImpl - Implementation helper for findValue. Value *Lint::findValueImpl(Value *V, bool OffsetOk, SmallPtrSet<Value *, 4> &Visited) const { // Detect self-referential values. if (!Visited.insert(V)) return UndefValue::get(V->getType()); // TODO: Look through sext or zext cast, when the result is known to // be interpreted as signed or unsigned, respectively. // TODO: Look through eliminable cast pairs. // TODO: Look through calls with unique return values. // TODO: Look through vector insert/extract/shuffle. V = OffsetOk ? V->getUnderlyingObject() : V->stripPointerCasts(); if (LoadInst *L = dyn_cast<LoadInst>(V)) { BasicBlock::iterator BBI = L; BasicBlock *BB = L->getParent(); SmallPtrSet<BasicBlock *, 4> VisitedBlocks; for (;;) { if (!VisitedBlocks.insert(BB)) break; if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), BB, BBI, 6, AA)) return findValueImpl(U, OffsetOk, Visited); if (BBI != BB->begin()) break; BB = BB->getUniquePredecessor(); if (!BB) break; BBI = BB->end(); } } else if (PHINode *PN = dyn_cast<PHINode>(V)) { if (Value *W = PN->hasConstantValue()) if (W != V) return findValueImpl(W, OffsetOk, Visited); } else if (CastInst *CI = dyn_cast<CastInst>(V)) { if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) : Type::getInt64Ty(V->getContext()))) return findValueImpl(CI->getOperand(0), OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), Ex->idx_begin(), Ex->idx_end())) if (W != V) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { // Same as above, but for ConstantExpr instead of Instruction. if (Instruction::isCast(CE->getOpcode())) { if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), CE->getOperand(0)->getType(), CE->getType(), TD ? TD->getIntPtrType(V->getContext()) : Type::getInt64Ty(V->getContext()))) return findValueImpl(CE->getOperand(0), OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { const SmallVector<unsigned, 4> &Indices = CE->getIndices(); if (Value *W = FindInsertedValue(CE->getOperand(0), Indices.begin(), Indices.end())) if (W != V) return findValueImpl(W, OffsetOk, Visited); } } // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast<Instruction>(V)) { if (Value *W = SimplifyInstruction(Inst, TD, DT)) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { if (Value *W = ConstantFoldConstantExpression(CE, TD)) if (W != V) return findValueImpl(W, OffsetOk, Visited); } return V; }
/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a /// single uncond branch between them, and BB contains no other non-phi /// instructions. bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const { // We only want to eliminate blocks whose phi nodes are used by phi nodes in // the successor. If there are more complex condition (e.g. preheaders), // don't mess around with them. BasicBlock::const_iterator BBI = BB->begin(); while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { const Instruction *User = cast<Instruction>(*UI); if (User->getParent() != DestBB || !isa<PHINode>(User)) return false; // If User is inside DestBB block and it is a PHINode then check // incoming value. If incoming value is not from BB then this is // a complex condition (e.g. preheaders) we want to avoid here. if (User->getParent() == DestBB) { if (const PHINode *UPN = dyn_cast<PHINode>(User)) for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); if (Insn && Insn->getParent() == BB && Insn->getParent() != UPN->getIncomingBlock(I)) return false; } } } } // If BB and DestBB contain any common predecessors, then the phi nodes in BB // and DestBB may have conflicting incoming values for the block. If so, we // can't merge the block. const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); if (!DestBBPN) return true; // no conflict. // Collect the preds of BB. SmallPtrSet<const BasicBlock*, 16> BBPreds; if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { // It is faster to get preds from a PHI than with pred_iterator. for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) BBPreds.insert(BBPN->getIncomingBlock(i)); } else { BBPreds.insert(pred_begin(BB), pred_end(BB)); } // Walk the preds of DestBB. for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = DestBBPN->getIncomingBlock(i); if (BBPreds.count(Pred)) { // Common predecessor? BBI = DestBB->begin(); while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { const Value *V1 = PN->getIncomingValueForBlock(Pred); const Value *V2 = PN->getIncomingValueForBlock(BB); // If V2 is a phi node in BB, look up what the mapped value will be. if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) if (V2PN->getParent() == BB) V2 = V2PN->getIncomingValueForBlock(Pred); // If there is a conflict, bail out. if (V1 != V2) return false; } } } return true; }
bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) { // Clone the program to try hacking it apart... ValueToValueMapTy VMap; Module *M = CloneModule(BD.getProgram(), VMap).release(); // Convert list to set for fast lookup... SmallPtrSet<BasicBlock*, 8> Blocks; for (unsigned i = 0, e = BBs.size(); i != e; ++i) Blocks.insert(cast<BasicBlock>(VMap[BBs[i]])); outs() << "Checking for crash with only these blocks:"; unsigned NumPrint = Blocks.size(); if (NumPrint > 10) NumPrint = 10; for (unsigned i = 0, e = NumPrint; i != e; ++i) outs() << " " << BBs[i]->getName(); if (NumPrint < Blocks.size()) outs() << "... <" << Blocks.size() << " total>"; outs() << ": "; // Loop over and delete any hack up any blocks that are not listed... for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB) if (!Blocks.count(&*BB) && BB->getTerminator()->getNumSuccessors()) { // Loop over all of the successors of this block, deleting any PHI nodes // that might include it. for (succ_iterator SI = succ_begin(&*BB), E = succ_end(&*BB); SI != E; ++SI) (*SI)->removePredecessor(&*BB); TerminatorInst *BBTerm = BB->getTerminator(); if (BBTerm->isEHPad()) continue; if (!BBTerm->getType()->isVoidTy() && !BBTerm->getType()->isTokenTy()) BBTerm->replaceAllUsesWith(Constant::getNullValue(BBTerm->getType())); // Replace the old terminator instruction. BB->getInstList().pop_back(); new UnreachableInst(BB->getContext(), &*BB); } // The CFG Simplifier pass may delete one of the basic blocks we are // interested in. If it does we need to take the block out of the list. Make // a "persistent mapping" by turning basic blocks into <function, name> pairs. // This won't work well if blocks are unnamed, but that is just the risk we // have to take. std::vector<std::pair<std::string, std::string> > BlockInfo; for (BasicBlock *BB : Blocks) BlockInfo.emplace_back(BB->getParent()->getName(), BB->getName()); // Now run the CFG simplify pass on the function... std::vector<std::string> Passes; Passes.push_back("simplifycfg"); Passes.push_back("verify"); std::unique_ptr<Module> New = BD.runPassesOn(M, Passes); delete M; if (!New) { errs() << "simplifycfg failed!\n"; exit(1); } M = New.release(); // Try running on the hacked up program... if (TestFn(BD, M)) { BD.setNewProgram(M); // It crashed, keep the trimmed version... // Make sure to use basic block pointers that point into the now-current // module, and that they don't include any deleted blocks. BBs.clear(); const ValueSymbolTable &GST = M->getValueSymbolTable(); for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { Function *F = cast<Function>(GST.lookup(BlockInfo[i].first)); ValueSymbolTable &ST = F->getValueSymbolTable(); Value* V = ST.lookup(BlockInfo[i].second); if (V && V->getType() == Type::getLabelTy(V->getContext())) BBs.push_back(cast<BasicBlock>(V)); } return true; } delete M; // It didn't crash, try something else. return false; }
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return /// instructions to the predecessor to enable tail call optimizations. The /// case it is currently looking for is: /// bb0: /// %tmp0 = tail call i32 @f0() /// br label %return /// bb1: /// %tmp1 = tail call i32 @f1() /// br label %return /// bb2: /// %tmp2 = tail call i32 @f2() /// br label %return /// return: /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] /// ret i32 %retval /// /// => /// /// bb0: /// %tmp0 = tail call i32 @f0() /// ret i32 %tmp0 /// bb1: /// %tmp1 = tail call i32 @f1() /// ret i32 %tmp1 /// bb2: /// %tmp2 = tail call i32 @f2() /// ret i32 %tmp2 /// bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { if (!TLI) return false; Value *V = RI->getReturnValue(); PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL; if (V && !PN) return false; BasicBlock *BB = RI->getParent(); if (PN && PN->getParent() != BB) return false; // It's not safe to eliminate the sign / zero extension of the return value. // See llvm::isInTailCallPosition(). const Function *F = BB->getParent(); Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) return false; // Make sure there are no instructions between the PHI and return, or that the // return is the first instruction in the block. if (PN) { BasicBlock::iterator BI = BB->begin(); do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); if (&*BI != RI) return false; } else { BasicBlock::iterator BI = BB->begin(); while (isa<DbgInfoIntrinsic>(BI)) ++BI; if (&*BI != RI) return false; } /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail /// call. SmallVector<CallInst*, 4> TailCalls; if (PN) { for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); // Make sure the phi value is indeed produced by the tail call. if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && TLI->mayBeEmittedAsTailCall(CI)) TailCalls.push_back(CI); } } else { SmallPtrSet<BasicBlock*, 4> VisitedBBs; for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { if (!VisitedBBs.insert(*PI)) continue; BasicBlock::InstListType &InstList = (*PI)->getInstList(); BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); if (RI == RE) continue; CallInst *CI = dyn_cast<CallInst>(&*RI); if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) TailCalls.push_back(CI); } } bool Changed = false; for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { CallInst *CI = TailCalls[i]; CallSite CS(CI); // Conservatively require the attributes of the call to match those of the // return. Ignore noalias because it doesn't affect the call sequence. Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes(); if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) continue; // Make sure the call instruction is followed by an unconditional branch to // the return block. BasicBlock *CallBB = CI->getParent(); BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) continue; // Duplicate the return into CallBB. (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); ModifiedDT = Changed = true; ++NumRetsDup; } // If we eliminated all predecessors of the block, delete the block now. if (Changed && pred_begin(BB) == pred_end(BB)) BB->eraseFromParent(); return Changed; }
/// ComputeLiveInBlocks - Determine which blocks the value is live in. These /// are blocks which lead to uses. Knowing this allows us to avoid inserting /// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes /// would be dead). void PromoteMem2Reg:: ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, const SmallPtrSet<BasicBlock*, 32> &DefBlocks, SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { // To determine liveness, we must iterate through the predecessors of blocks // where the def is live. Blocks are added to the worklist if we need to // check their predecessors. Start with all the using blocks. SmallVector<BasicBlock*, 64> LiveInBlockWorklist; LiveInBlockWorklist.insert(LiveInBlockWorklist.end(), Info.UsingBlocks.begin(), Info.UsingBlocks.end()); // If any of the using blocks is also a definition block, check to see if the // definition occurs before or after the use. If it happens before the use, // the value isn't really live-in. for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { BasicBlock *BB = LiveInBlockWorklist[i]; if (!DefBlocks.count(BB)) continue; // Okay, this is a block that both uses and defines the value. If the first // reference to the alloca is a def (store), then we know it isn't live-in. for (BasicBlock::iterator I = BB->begin(); ; ++I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) { if (SI->getOperand(1) != AI) continue; // We found a store to the alloca before a load. The alloca is not // actually live-in here. LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); LiveInBlockWorklist.pop_back(); --i, --e; break; } if (LoadInst *LI = dyn_cast<LoadInst>(I)) { if (LI->getOperand(0) != AI) continue; // Okay, we found a load before a store to the alloca. It is actually // live into this block. break; } } } // Now that we have a set of blocks where the phi is live-in, recursively add // their predecessors until we find the full region the value is live. while (!LiveInBlockWorklist.empty()) { BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); // The block really is live in here, insert it into the set. If already in // the set, then it has already been processed. if (!LiveInBlocks.insert(BB)) continue; // Since the value is live into BB, it is either defined in a predecessor or // live into it to. Add the preds to the worklist unless they are a // defining block. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *P = *PI; // The value is not live into a predecessor if it defines the value. if (DefBlocks.count(P)) continue; // Otherwise it is, add to the worklist. LiveInBlockWorklist.push_back(P); } } }
void TypeChecker::configureInterfaceType(AbstractFunctionDecl *func) { Type funcTy; Type initFuncTy; auto *sig = func->getGenericSignature(); if (auto fn = dyn_cast<FuncDecl>(func)) { funcTy = fn->getBodyResultTypeLoc().getType(); if (!funcTy) { funcTy = TupleType::getEmpty(Context); } else { funcTy = getResultType(*this, fn, funcTy); } } else if (auto ctor = dyn_cast<ConstructorDecl>(func)) { auto *dc = ctor->getDeclContext(); funcTy = dc->getSelfInterfaceType(); // Adjust result type for failability. if (ctor->getFailability() != OTK_None) funcTy = OptionalType::get(ctor->getFailability(), funcTy); initFuncTy = funcTy; } else { assert(isa<DestructorDecl>(func)); funcTy = TupleType::getEmpty(Context); } auto paramLists = func->getParameterLists(); SmallVector<ParameterList*, 4> storedParamLists; // FIXME: Destructors don't have the '()' pattern in their signature, so // paste it here. if (isa<DestructorDecl>(func)) { assert(paramLists.size() == 1 && "Only the self paramlist"); storedParamLists.push_back(paramLists[0]); storedParamLists.push_back(ParameterList::createEmpty(Context)); paramLists = storedParamLists; } bool hasSelf = func->getDeclContext()->isTypeContext(); for (unsigned i = 0, e = paramLists.size(); i != e; ++i) { Type argTy; Type initArgTy; Type selfTy; if (i == e-1 && hasSelf) { selfTy = func->computeInterfaceSelfType(/*isInitializingCtor=*/false); // Substitute in our own 'self' parameter. argTy = selfTy; if (initFuncTy) { initArgTy = func->computeInterfaceSelfType(/*isInitializingCtor=*/true); } } else { argTy = paramLists[e - i - 1]->getInterfaceType(func->getDeclContext()); if (initFuncTy) initArgTy = argTy; } // 'throws' only applies to the innermost function. AnyFunctionType::ExtInfo info; if (i == 0 && func->hasThrows()) info = info.withThrows(); assert(!argTy->hasArchetype()); assert(!funcTy->hasArchetype()); if (initFuncTy) assert(!initFuncTy->hasArchetype()); if (sig && i == e-1) { funcTy = GenericFunctionType::get(sig, argTy, funcTy, info); if (initFuncTy) initFuncTy = GenericFunctionType::get(sig, initArgTy, initFuncTy, info); } else { funcTy = FunctionType::get(argTy, funcTy, info); if (initFuncTy) initFuncTy = FunctionType::get(initArgTy, initFuncTy, info); } } // Record the interface type. func->setInterfaceType(funcTy); if (initFuncTy) cast<ConstructorDecl>(func)->setInitializerInterfaceType(initFuncTy); if (func->getGenericParams()) { // Collect all generic params referenced in parameter types, // return type or requirements. SmallPtrSet<GenericTypeParamDecl *, 4> referencedGenericParams; auto visitorFn = [&referencedGenericParams](Type t) { if (auto *paramTy = t->getAs<GenericTypeParamType>()) referencedGenericParams.insert(paramTy->getDecl()); }; funcTy->castTo<AnyFunctionType>()->getInput().visit(visitorFn); funcTy->castTo<AnyFunctionType>()->getResult().visit(visitorFn); auto requirements = sig->getRequirements(); for (auto req : requirements) { if (req.getKind() == RequirementKind::SameType) { // Same type requirements may allow for generic // inference, even if this generic parameter // is not mentioned in the function signature. // TODO: Make the test more precise. auto left = req.getFirstType(); auto right = req.getSecondType(); // For now consider any references inside requirements // as a possibility to infer the generic type. left.visit(visitorFn); right.visit(visitorFn); } } // Find the depth of the function's own generic parameters. unsigned fnGenericParamsDepth = func->getGenericParams()->getDepth(); // Check that every generic parameter type from the signature is // among referencedArchetypes. for (auto *genParam : sig->getGenericParams()) { auto *paramDecl = genParam->getDecl(); if (paramDecl->getDepth() != fnGenericParamsDepth) continue; if (!referencedGenericParams.count(paramDecl)) { // Produce an error that this generic parameter cannot be bound. diagnose(paramDecl->getLoc(), diag::unreferenced_generic_parameter, paramDecl->getNameStr()); func->setInvalid(); } } } }
/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the /// observable behavior of the program other than finite running time. Note /// we do ensure that this never remove a loop that might be infinite, as doing /// so could change the halting/non-halting nature of a program. /// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA /// in order to make various safety checks work. bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) { // We can only remove the loop if there is a preheader that we can // branch from after removing it. BasicBlock *preheader = L->getLoopPreheader(); if (!preheader) return false; // If LoopSimplify form is not available, stay out of trouble. if (!L->hasDedicatedExits()) return false; // We can't remove loops that contain subloops. If the subloops were dead, // they would already have been removed in earlier executions of this pass. if (L->begin() != L->end()) return false; SmallVector<BasicBlock*, 4> exitingBlocks; L->getExitingBlocks(exitingBlocks); SmallVector<BasicBlock*, 4> exitBlocks; L->getUniqueExitBlocks(exitBlocks); // We require that the loop only have a single exit block. Otherwise, we'd // be in the situation of needing to be able to solve statically which exit // block will be branched to, or trying to preserve the branching logic in // a loop invariant manner. if (exitBlocks.size() != 1) return false; // Finally, we have to check that the loop really is dead. bool Changed = false; if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) return Changed; // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. ScalarEvolution &SE = getAnalysis<ScalarEvolution>(); const SCEV *S = SE.getMaxBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(S)) return Changed; // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. BasicBlock *exitBlock = exitBlocks[0]; // Because we're deleting a large chunk of code at once, the sequence in which // we remove things is very important to avoid invalidation issues. Don't // mess with this unless you have good reason and know what you're doing. // Tell ScalarEvolution that the loop is deleted. Do this before // deleting the loop so that ScalarEvolution can look at the loop // to determine what it needs to clean up. SE.forgetLoop(L); // Connect the preheader directly to the exit block. TerminatorInst *TI = preheader->getTerminator(); TI->replaceUsesOfWith(L->getHeader(), exitBlock); // Rewrite phis in the exit block to get their inputs from // the preheader instead of the exiting block. BasicBlock *exitingBlock = exitingBlocks[0]; BasicBlock::iterator BI = exitBlock->begin(); while (PHINode *P = dyn_cast<PHINode>(BI)) { int j = P->getBasicBlockIndex(exitingBlock); assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); P->setIncomingBlock(j, preheader); for (unsigned i = 1; i < exitingBlocks.size(); ++i) P->removeIncomingValue(exitingBlocks[i]); ++BI; } // Update the dominator tree and remove the instructions and blocks that will // be deleted from the reference counting scheme. DominatorTree &DT = getAnalysis<DominatorTree>(); SmallVector<DomTreeNode*, 8> ChildNodes; for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); LI != LE; ++LI) { // Move all of the block's children to be children of the preheader, which // allows us to remove the domtree entry for the block. ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end()); for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(), DE = ChildNodes.end(); DI != DE; ++DI) { DT.changeImmediateDominator(*DI, DT[preheader]); } ChildNodes.clear(); DT.eraseNode(*LI); // Remove the block from the reference counting scheme, so that we can // delete it freely later. (*LI)->dropAllReferences(); } // Erase the instructions and the blocks without having to worry // about ordering because we already dropped the references. // NOTE: This iteration is safe because erasing the block does not remove its // entry from the loop's block list. We do that in the next section. for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); LI != LE; ++LI) (*LI)->eraseFromParent(); // Finally, the blocks from loopinfo. This has to happen late because // otherwise our loop iterators won't work. LoopInfo &loopInfo = getAnalysis<LoopInfo>(); SmallPtrSet<BasicBlock*, 8> blocks; blocks.insert(L->block_begin(), L->block_end()); for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(), E = blocks.end(); I != E; ++I) loopInfo.removeBlock(*I); // The last step is to inform the loop pass manager that we've // eliminated this loop. LPM.deleteLoopFromQueue(L); Changed = true; ++NumDeleted; return Changed; }
bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraph>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); SmallPtrSet<Function*, 8> SCCFunctions; DEBUG(dbgs() << "Inliner visiting SCC:"); for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); if (F) SCCFunctions.insert(F); DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); } // Scan through and identify all call sites ahead of time so that we only // inline call sites in the original functions, not call sites that result // from inlining other functions. SmallVector<std::pair<CallSite, int>, 16> CallSites; // When inlining a callee produces new call sites, we want to keep track of // the fact that they were inlined from the callee. This allows us to avoid // infinite inlining in some obscure cases. To represent this, we use an // index into the InlineHistory vector. SmallVector<std::pair<Function*, int>, 8> InlineHistory; for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); if (!F) continue; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { CallSite CS(cast<Value>(I)); // If this isn't a call, or it is a call to an intrinsic, it can // never be inlined. if (!CS || isa<IntrinsicInst>(I)) continue; // If this is a direct call to an external function, we can never inline // it. If it is an indirect call, inlining may resolve it to be a // direct call, so we keep it. if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) continue; CallSites.push_back(std::make_pair(CS, -1)); } } DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); // If there are no calls in this function, exit early. if (CallSites.empty()) return false; // Now that we have all of the call sites, move the ones to functions in the // current SCC to the end of the list. unsigned FirstCallInSCC = CallSites.size(); for (unsigned i = 0; i < FirstCallInSCC; ++i) if (Function *F = CallSites[i].first.getCalledFunction()) if (SCCFunctions.count(F)) std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); InlinedArrayAllocasTy InlinedArrayAllocas; InlineFunctionInfo InlineInfo(&CG, TD); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. bool Changed = false; bool LocalChange; do { LocalChange = false; // Iterate over the outer loop because inlining functions can cause indirect // calls to become direct calls. for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { CallSite CS = CallSites[CSi].first; Function *Caller = CS.getCaller(); Function *Callee = CS.getCalledFunction(); // If this call site is dead and it is to a readonly function, we should // just delete the call instead of trying to inline it, regardless of // size. This happens because IPSCCP propagates the result out of the // call and then we're left with the dead call. if (isInstructionTriviallyDead(CS.getInstruction())) { DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() << "\n"); // Update the call graph by deleting the edge from Callee to Caller. CG[Caller]->removeCallEdgeFor(CS); CS.getInstruction()->eraseFromParent(); ++NumCallsDeleted; } else { // We can only inline direct calls to non-declarations. if (Callee == 0 || Callee->isDeclaration()) continue; // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee // itself. If so, we'd be recursively inlining the same function, // which would provide the same callsites, which would cause us to // infinitely inline. int InlineHistoryID = CallSites[CSi].second; if (InlineHistoryID != -1 && InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) continue; // If the policy determines that we should inline this function, // try to do so. if (!shouldInline(CS)) continue; // Attempt to inline the function. if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID, InsertLifetime)) continue; ++NumInlined; // If inlining this function gave us any new call sites, throw them // onto our worklist to process. They are useful inline candidates. if (!InlineInfo.InlinedCalls.empty()) { // Create a new inline history entry for this, so that we remember // that these new callsites came about due to inlining Callee. int NewHistoryID = InlineHistory.size(); InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); for (unsigned i = 0, e = InlineInfo.InlinedCalls.size(); i != e; ++i) { Value *Ptr = InlineInfo.InlinedCalls[i]; CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); } } } // If we inlined or deleted the last possible call site to the function, // delete the function body now. if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() && // TODO: Can remove if in SCC now. !SCCFunctions.count(Callee) && // The function may be apparently dead, but if there are indirect // callgraph references to the node, we cannot delete it yet, this // could invalidate the CGSCC iterator. CG[Callee]->getNumReferences() == 0) { DEBUG(dbgs() << " -> Deleting dead function: " << Callee->getName() << "\n"); CallGraphNode *CalleeNode = CG[Callee]; // Remove any call graph edges from the callee to its callees. CalleeNode->removeAllCalledFunctions(); // Removing the node for callee from the call graph and delete it. delete CG.removeFunctionFromModule(CalleeNode); ++NumDeleted; } // Remove this call site from the list. If possible, use // swap/pop_back for efficiency, but do not use it if doing so would // move a call site to a function in this SCC before the // 'FirstCallInSCC' barrier. if (SCC.isSingular()) { CallSites[CSi] = CallSites.back(); CallSites.pop_back(); } else { CallSites.erase(CallSites.begin()+CSi); } --CSi; Changed = true; LocalChange = true; } } while (LocalChange); return Changed; }
// If we can determine that all possible objects pointed to by the provided // pointer value are, not only dereferenceable, but also definitively less than // or equal to the provided maximum size, then return true. Otherwise, return // false (constant global values and allocas fall into this category). // // FIXME: This should probably live in ValueTracking (or similar). static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL) { SmallPtrSet<Value *, 4> Visited; SmallVector<Value *, 4> Worklist(1, V); do { Value *P = Worklist.pop_back_val(); P = P->stripPointerCasts(); if (!Visited.insert(P).second) continue; if (SelectInst *SI = dyn_cast<SelectInst>(P)) { Worklist.push_back(SI->getTrueValue()); Worklist.push_back(SI->getFalseValue()); continue; } if (PHINode *PN = dyn_cast<PHINode>(P)) { for (Value *IncValue : PN->incoming_values()) Worklist.push_back(IncValue); continue; } if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) { if (GA->mayBeOverridden()) return false; Worklist.push_back(GA->getAliasee()); continue; } // If we know how big this object is, and it is less than MaxSize, continue // searching. Otherwise, return false. if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) { if (!AI->getAllocatedType()->isSized()) return false; ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize()); if (!CS) return false; uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType()); // Make sure that, even if the multiplication below would wrap as an // uint64_t, we still do the right thing. if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize)) return false; continue; } if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { if (!GV->hasDefinitiveInitializer() || !GV->isConstant()) return false; uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType()); if (InitSize > MaxSize) return false; continue; } return false; } while (!Worklist.empty()); return true; }
bool BDCE::runOnFunction(Function& F) { if (skipOptnoneFunction(F)) return false; AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); DenseMap<Instruction *, APInt> AliveBits; SmallVector<Instruction*, 128> Worklist; // The set of visited instructions (non-integer-typed only). SmallPtrSet<Instruction*, 128> Visited; // Collect the set of "root" instructions that are known live. for (Instruction &I : inst_range(F)) { if (!isAlwaysLive(&I)) continue; DEBUG(dbgs() << "BDCE: Root: " << I << "\n"); // For integer-valued instructions, set up an initial empty set of alive // bits and add the instruction to the work list. For other instructions // add their operands to the work list (for integer values operands, mark // all bits as live). if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { if (!AliveBits.count(&I)) { AliveBits[&I] = APInt(IT->getBitWidth(), 0); Worklist.push_back(&I); } continue; } // Non-integer-typed instructions... for (Use &OI : I.operands()) { if (Instruction *J = dyn_cast<Instruction>(OI)) { if (IntegerType *IT = dyn_cast<IntegerType>(J->getType())) AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth()); Worklist.push_back(J); } } // To save memory, we don't add I to the Visited set here. Instead, we // check isAlwaysLive on every instruction when searching for dead // instructions later (we need to check isAlwaysLive for the // integer-typed instructions anyway). } // Propagate liveness backwards to operands. while (!Worklist.empty()) { Instruction *UserI = Worklist.pop_back_val(); DEBUG(dbgs() << "BDCE: Visiting: " << *UserI); APInt AOut; if (UserI->getType()->isIntegerTy()) { AOut = AliveBits[UserI]; DEBUG(dbgs() << " Alive Out: " << AOut); } DEBUG(dbgs() << "\n"); if (!UserI->getType()->isIntegerTy()) Visited.insert(UserI); APInt KnownZero, KnownOne, KnownZero2, KnownOne2; // Compute the set of alive bits for each operand. These are anded into the // existing set, if any, and if that changes the set of alive bits, the // operand is added to the work-list. for (Use &OI : UserI->operands()) { if (Instruction *I = dyn_cast<Instruction>(OI)) { if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) { unsigned BitWidth = IT->getBitWidth(); APInt AB = APInt::getAllOnesValue(BitWidth); if (UserI->getType()->isIntegerTy() && !AOut && !isAlwaysLive(UserI)) { AB = APInt(BitWidth, 0); } else { // If all bits of the output are dead, then all bits of the input // Bits of each operand that are used to compute alive bits of the // output are alive, all others are dead. determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB, KnownZero, KnownOne, KnownZero2, KnownOne2); } // If we've added to the set of alive bits (or the operand has not // been previously visited), then re-queue the operand to be visited // again. APInt ABPrev(BitWidth, 0); auto ABI = AliveBits.find(I); if (ABI != AliveBits.end()) ABPrev = ABI->second; APInt ABNew = AB | ABPrev; if (ABNew != ABPrev || ABI == AliveBits.end()) { AliveBits[I] = std::move(ABNew); Worklist.push_back(I); } } else if (!Visited.count(I)) { Worklist.push_back(I); } } } } bool Changed = false; // The inverse of the live set is the dead set. These are those instructions // which have no side effects and do not influence the control flow or return // value of the function, and may therefore be deleted safely. // NOTE: We reuse the Worklist vector here for memory efficiency. for (Instruction &I : inst_range(F)) { // For live instructions that have all dead bits, first make them dead by // replacing all uses with something else. Then, if they don't need to // remain live (because they have side effects, etc.) we can remove them. if (I.getType()->isIntegerTy()) { auto ABI = AliveBits.find(&I); if (ABI != AliveBits.end()) { if (ABI->second.getBoolValue()) continue; DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n"); // FIXME: In theory we could substitute undef here instead of zero. // This should be reconsidered once we settle on the semantics of // undef, poison, etc. Value *Zero = ConstantInt::get(I.getType(), 0); ++NumSimplified; I.replaceAllUsesWith(Zero); Changed = true; } } else if (Visited.count(&I)) { continue; } if (isAlwaysLive(&I)) continue; Worklist.push_back(&I); I.dropAllReferences(); Changed = true; } for (Instruction *&I : Worklist) { ++NumRemoved; I->eraseFromParent(); } return Changed; }
/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" calls. The /// "unwind" part of these invokes jump to a landing pad within the current /// function. This is a candidate to merge the selector associated with the URoR /// invoke with the one from the URoR's landing pad. bool DwarfEHPrepare::HandleURoRInvokes() { if (!EHCatchAllValue) { EHCatchAllValue = F->getParent()->getNamedGlobal("llvm.eh.catch.all.value"); if (!EHCatchAllValue) return false; } if (!SelectorIntrinsic) { SelectorIntrinsic = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector); if (!SelectorIntrinsic) return false; } SmallPtrSet<IntrinsicInst*, 32> Sels; SmallPtrSet<IntrinsicInst*, 32> CatchAllSels; FindAllCleanupSelectors(Sels, CatchAllSels); if (!DT) // We require DominatorTree information. return CleanupSelectors(CatchAllSels); if (!URoR) { URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); if (!URoR) return CleanupSelectors(CatchAllSels); } SmallPtrSet<InvokeInst*, 32> URoRInvokes; FindAllURoRInvokes(URoRInvokes); SmallPtrSet<IntrinsicInst*, 32> SelsToConvert; for (SmallPtrSet<IntrinsicInst*, 32>::iterator SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) { const BasicBlock *SelBB = (*SI)->getParent(); for (SmallPtrSet<InvokeInst*, 32>::iterator UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) { const BasicBlock *URoRBB = (*UI)->getParent(); if (DT->dominates(SelBB, URoRBB)) { SelsToConvert.insert(*SI); break; } } } bool Changed = false; if (Sels.size() != SelsToConvert.size()) { // If we haven't been able to convert all of the clean-up selectors, then // loop through the slow way to see if they still need to be converted. if (!ExceptionValueIntrinsic) { ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception); if (!ExceptionValueIntrinsic) return CleanupSelectors(CatchAllSels); } for (Value::use_iterator I = ExceptionValueIntrinsic->use_begin(), E = ExceptionValueIntrinsic->use_end(); I != E; ++I) { IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I); if (!EHPtr || EHPtr->getParent()->getParent() != F) continue; Changed |= PromoteEHPtrStore(EHPtr); bool URoRInvoke = false; SmallPtrSet<IntrinsicInst*, 8> SelCalls; Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls); if (URoRInvoke) { // This EH pointer is being used by an invoke of an URoR instruction and // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we // need to convert it to a 'catch-all'. for (SmallPtrSet<IntrinsicInst*, 8>::iterator SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) if (!HasCatchAllInSelector(*SI)) SelsToConvert.insert(*SI); } } } if (!SelsToConvert.empty()) { // Convert all clean-up eh.selectors, which are associated with "invokes" of // URoR calls, into catch-all eh.selectors. Changed = true; for (SmallPtrSet<IntrinsicInst*, 8>::iterator SI = SelsToConvert.begin(), SE = SelsToConvert.end(); SI != SE; ++SI) { IntrinsicInst *II = *SI; // Use the exception object pointer and the personality function // from the original selector. CallSite CS(II); IntrinsicInst::op_iterator I = CS.arg_begin(); IntrinsicInst::op_iterator E = CS.arg_end(); IntrinsicInst::op_iterator B = prior(E); // Exclude last argument if it is an integer. if (isa<ConstantInt>(B)) E = B; // Add exception object pointer (front). // Add personality function (next). // Add in any filter IDs (rest). SmallVector<Value*, 8> Args(I, E); Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator. CallInst *NewSelector = CallInst::Create(SelectorIntrinsic, Args.begin(), Args.end(), "eh.sel.catch.all", II); NewSelector->setTailCall(II->isTailCall()); NewSelector->setAttributes(II->getAttributes()); NewSelector->setCallingConv(II->getCallingConv()); II->replaceAllUsesWith(NewSelector); II->eraseFromParent(); } } Changed |= CleanupSelectors(CatchAllSels); return Changed; }
// Look up multiple symbols in the symbol table and return a set of // Modules that define those symbols. bool Archive::findModulesDefiningSymbols(std::set<std::string>& symbols, SmallVectorImpl<Module*>& result, std::string* error) { if (!mapfile || !base) { if (error) *error = "Empty archive invalid for finding modules defining symbols"; return false; } if (symTab.empty()) { // We don't have a symbol table, so we must build it now but lets also // make sure that we populate the modules table as we do this to ensure // that we don't load them twice when findModuleDefiningSymbol is called // below. // Get a pointer to the first file const char* At = base + firstFileOffset; const char* End = mapfile->getBufferEnd(); while ( At < End) { // Compute the offset to be put in the symbol table unsigned offset = At - base - firstFileOffset; // Parse the file's header ArchiveMember* mbr = parseMemberHeader(At, End, error); if (!mbr) return false; // If it contains symbols if (mbr->isBitcode()) { // Get the symbols std::vector<std::string> symbols; std::string FullMemberName = archPath.str() + "(" + mbr->getPath().str() + ")"; Module* M = GetBitcodeSymbols(At, mbr->getSize(), FullMemberName, Context, symbols, error); if (M) { // Insert the module's symbols into the symbol table for (std::vector<std::string>::iterator I = symbols.begin(), E=symbols.end(); I != E; ++I ) { symTab.insert(std::make_pair(*I, offset)); } // Insert the Module and the ArchiveMember into the table of // modules. modules.insert(std::make_pair(offset, std::make_pair(M, mbr))); } else { if (error) *error = "Can't parse bitcode member: " + mbr->getPath().str() + ": " + *error; delete mbr; return false; } } // Go to the next file location At += mbr->getSize(); if ((intptr_t(At) & 1) == 1) At++; } } // At this point we have a valid symbol table (one way or another) so we // just use it to quickly find the symbols requested. SmallPtrSet<Module*, 16> Added; for (std::set<std::string>::iterator I=symbols.begin(), Next = I, E=symbols.end(); I != E; I = Next) { // Increment Next before we invalidate it. ++Next; // See if this symbol exists Module* m = findModuleDefiningSymbol(*I,error); if (!m) continue; bool NewMember = Added.insert(m); if (!NewMember) continue; // The symbol exists, insert the Module into our result. result.push_back(m); // Remove the symbol now that its been resolved. symbols.erase(I); } return true; }
/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. /// Emit code to ensure constants are copied into registers when needed. /// Remember the virtual registers that need to be added to the Machine PHI /// nodes as input. We cannot just directly add them, because expansion /// might result in multiple MBB's for one BB. As such, the start of the /// BB might correspond to a different MBB than the end. bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); // Check successor nodes' PHI nodes that expect a constant to be available // from this block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa<PHINode>(SuccBB->begin())) continue; MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. if (!SuccsHandled.insert(SuccMBB)) continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. for (BasicBlock::const_iterator I = SuccBB->begin(); const PHINode *PN = dyn_cast<PHINode>(I); ++I) { // Ignore dead phi's. if (PN->use_empty()) continue; // Only handle legal types. Two interesting things to note here. First, // by bailing out early, we may leave behind some dead instructions, // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its // own moves. Second, this check is necessary because FastISel doesn't // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { // Promote MVT::i1. if (VT == MVT::i1) VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); else { FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } } const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); // Set the DebugLoc for the copy. Prefer the location of the operand // if there is one; use the location of the PHI otherwise. DL = PN->getDebugLoc(); if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp)) DL = Inst->getDebugLoc(); unsigned Reg = getRegForValue(PHIOp); if (Reg == 0) { FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); DL = DebugLoc(); } } return true; }
void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { FirstTerminator = 0; if (MRI->isSSA()) { // If this block has allocatable physical registers live-in, check that // it is an entry block or landing pad. for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), LE = MBB->livein_end(); LI != LE; ++LI) { unsigned reg = *LI; if (isAllocatable(reg) && !MBB->isLandingPad() && MBB != MBB->getParent()->begin()) { report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB); } } } // Count the number of landing pad successors. SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { if ((*I)->isLandingPad()) LandingPadSuccs.insert(*I); } const MCAsmInfo *AsmInfo = TM->getMCAsmInfo(); const BasicBlock *BB = MBB->getBasicBlock(); if (LandingPadSuccs.size() > 1 && !(AsmInfo && AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj && BB && isa<SwitchInst>(BB->getTerminator()))) report("MBB has more than one landing pad successor", MBB); // Call AnalyzeBranch. If it succeeds, there several more conditions to check. MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector<MachineOperand, 4> Cond; if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB), TBB, FBB, Cond)) { // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's // check whether its answers match up with reality. if (!TBB && !FBB) { // Block falls through to its successor. MachineFunction::const_iterator MBBI = MBB; ++MBBI; if (MBBI == MF->end()) { // It's possible that the block legitimately ends with a noreturn // call or an unreachable, in which case it won't actually fall // out the bottom of the function. } else if (MBB->succ_size() == LandingPadSuccs.size()) { // It's possible that the block legitimately ends with a noreturn // call or an unreachable, in which case it won't actuall fall // out of the block. } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) { report("MBB exits via unconditional fall-through but doesn't have " "exactly one CFG successor!", MBB); } else if (!MBB->isSuccessor(MBBI)) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } if (!MBB->empty() && MBB->back().isBarrier() && !TII->isPredicated(&MBB->back())) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); } if (!Cond.empty()) { report("MBB exits via unconditional fall-through but has a condition!", MBB); } } else if (TBB && !FBB && Cond.empty()) { // Block unconditionally branches somewhere. if (MBB->succ_size() != 1+LandingPadSuccs.size()) { report("MBB exits via unconditional branch but doesn't have " "exactly one CFG successor!", MBB); } else if (!MBB->isSuccessor(TBB)) { report("MBB exits via unconditional branch but the CFG " "successor doesn't match the actual successor!", MBB); } if (MBB->empty()) { report("MBB exits via unconditional branch but doesn't contain " "any instructions!", MBB); } else if (!MBB->back().isBarrier()) { report("MBB exits via unconditional branch but doesn't end with a " "barrier instruction!", MBB); } else if (!MBB->back().isTerminator()) { report("MBB exits via unconditional branch but the branch isn't a " "terminator instruction!", MBB); } } else if (TBB && !FBB && !Cond.empty()) { // Block conditionally branches somewhere, otherwise falls through. MachineFunction::const_iterator MBBI = MBB; ++MBBI; if (MBBI == MF->end()) { report("MBB conditionally falls through out of function!", MBB); } if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/fall-through but doesn't have " "exactly two CFG successors!", MBB); } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) { report("MBB exits via conditional branch/fall-through but the CFG " "successors don't match the actual successors!", MBB); } if (MBB->empty()) { report("MBB exits via conditional branch/fall-through but doesn't " "contain any instructions!", MBB); } else if (MBB->back().isBarrier()) { report("MBB exits via conditional branch/fall-through but ends with a " "barrier instruction!", MBB); } else if (!MBB->back().isTerminator()) { report("MBB exits via conditional branch/fall-through but the branch " "isn't a terminator instruction!", MBB); } } else if (TBB && FBB) { // Block conditionally branches somewhere, otherwise branches // somewhere else. if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/branch but doesn't have " "exactly two CFG successors!", MBB); } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) { report("MBB exits via conditional branch/branch but the CFG " "successors don't match the actual successors!", MBB); } if (MBB->empty()) { report("MBB exits via conditional branch/branch but doesn't " "contain any instructions!", MBB); } else if (!MBB->back().isBarrier()) { report("MBB exits via conditional branch/branch but doesn't end with a " "barrier instruction!", MBB); } else if (!MBB->back().isTerminator()) { report("MBB exits via conditional branch/branch but the branch " "isn't a terminator instruction!", MBB); } if (Cond.empty()) { report("MBB exits via conditinal branch/branch but there's no " "condition!", MBB); } } else { report("AnalyzeBranch returned invalid data!", MBB); } } regsLive.clear(); for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) { if (!TargetRegisterInfo::isPhysicalRegister(*I)) { report("MBB live-in list contains non-physical register", MBB); continue; } regsLive.insert(*I); for (const uint16_t *R = TRI->getSubRegisters(*I); *R; R++) regsLive.insert(*R); } regsLiveInButUnused = regsLive; const MachineFrameInfo *MFI = MF->getFrameInfo(); assert(MFI && "Function has no frame info"); BitVector PR = MFI->getPristineRegs(MBB); for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { regsLive.insert(I); for (const uint16_t *R = TRI->getSubRegisters(I); *R; R++) regsLive.insert(*R); } regsKilled.clear(); regsDefined.clear(); if (Indexes) lastIndex = Indexes->getMBBStartIdx(MBB); }
/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst *> Invokes) { // Finally, scan the code looking for instructions with bad live ranges. for (BasicBlock &BB : F) { for (Instruction &Inst : BB) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. if (Inst.use_empty()) continue; if (Inst.hasOneUse() && cast<Instruction>(Inst.user_back())->getParent() == &BB && !isa<PHINode>(Inst.user_back())) continue; // If this is an alloca in the entry block, it's not a real register // value. if (auto *AI = dyn_cast<AllocaInst>(&Inst)) if (AI->isStaticAlloca()) continue; // Avoid iterator invalidation by copying users to a temporary vector. SmallVector<Instruction *, 16> Users; for (User *U : Inst.users()) { Instruction *UI = cast<Instruction>(U); if (UI->getParent() != &BB || isa<PHINode>(UI)) Users.push_back(UI); } // Find all of the blocks that this value is live in. SmallPtrSet<BasicBlock *, 32> LiveBBs; LiveBBs.insert(&BB); while (!Users.empty()) { Instruction *U = Users.pop_back_val(); if (!isa<PHINode>(U)) { MarkBlocksLiveIn(U->getParent(), LiveBBs); } else { // Uses for a PHI node occur in their predecessor block. PHINode *PN = cast<PHINode>(U); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == &Inst) MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); } } // Now that we know all of the blocks that this thing is live in, see if // it includes any of the unwind locations. bool NeedsSpill = false; for (InvokeInst *Invoke : Invokes) { BasicBlock *UnwindBlock = Invoke->getUnwindDest(); if (UnwindBlock != &BB && LiveBBs.count(UnwindBlock)) { LLVM_DEBUG(dbgs() << "SJLJ Spill: " << Inst << " around " << UnwindBlock->getName() << "\n"); NeedsSpill = true; break; } } // If we decided we need a spill, do it. // FIXME: Spilling this way is overkill, as it forces all uses of // the value to be reloaded from the stack slot, even those that aren't // in the unwind blocks. We should be more selective. if (NeedsSpill) { DemoteRegToStack(Inst, true); ++NumSpilled; } } } // Go through the landing pads and remove any PHIs there. for (InvokeInst *Invoke : Invokes) { BasicBlock *UnwindBlock = Invoke->getUnwindDest(); LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); // Place PHIs into a set to avoid invalidating the iterator. SmallPtrSet<PHINode *, 8> PHIsToDemote; for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) PHIsToDemote.insert(cast<PHINode>(PN)); if (PHIsToDemote.empty()) continue; // Demote the PHIs to the stack. for (PHINode *PN : PHIsToDemote) DemotePHIToStack(PN); // Move the landingpad instruction back to the top of the landing pad block. LPI->moveBefore(&UnwindBlock->front()); } }
/// \brief Recursively traverse the CFG of the function, renaming loads and /// stores to the allocas which we are promoting. /// /// IncomingVals indicates what value each Alloca contains on exit from the /// predecessor block Pred. void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncomingVals, std::vector<RenamePassData> &Worklist) { NextIteration: // If we are inserting any phi nodes into this BB, they will already be in the // block. if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { // If we have PHI nodes to update, compute the number of edges from Pred to // BB. if (PhiToAllocaMap.count(APN)) { // We want to be able to distinguish between PHI nodes being inserted by // this invocation of mem2reg from those phi nodes that already existed in // the IR before mem2reg was run. We determine that APN is being inserted // because it is missing incoming edges. All other PHI nodes being // inserted by this pass of mem2reg will have the same number of incoming // operands so far. Remember this count. unsigned NewPHINumOperands = APN->getNumOperands(); unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB); assert(NumEdges && "Must be at least one edge from Pred to BB!"); // Add entries for all the phis. BasicBlock::iterator PNI = BB->begin(); do { unsigned AllocaNo = PhiToAllocaMap[APN]; // Add N incoming values to the PHI node. for (unsigned i = 0; i != NumEdges; ++i) APN->addIncoming(IncomingVals[AllocaNo], Pred); // The currently active variable for this block is now the PHI. IncomingVals[AllocaNo] = APN; // Get the next phi node. ++PNI; APN = dyn_cast<PHINode>(PNI); if (!APN) break; // Verify that it is missing entries. If not, it is not being inserted // by this mem2reg invocation so we want to ignore it. } while (APN->getNumOperands() == NewPHINumOperands); } } // Don't revisit blocks. if (!Visited.insert(BB)) return; for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) { Instruction *I = II++; // get the instruction, increment iterator if (LoadInst *LI = dyn_cast<LoadInst>(I)) { AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); if (!Src) continue; DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src); if (AI == AllocaLookup.end()) continue; Value *V = IncomingVals[AI->second]; // Anything using the load now uses the current value. LI->replaceAllUsesWith(V); if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); BB->getInstList().erase(LI); } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { // Delete this instruction and mark the name as the current holder of the // value AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); if (!Dest) continue; DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); if (ai == AllocaLookup.end()) continue; // what value were we writing? IncomingVals[ai->second] = SI->getOperand(0); // Record debuginfo for the store before removing it. if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) ConvertDebugDeclareToDebugValue(DDI, SI, DIB); BB->getInstList().erase(SI); } } // 'Recurse' to our successors. succ_iterator I = succ_begin(BB), E = succ_end(BB); if (I == E) return; // Keep track of the successors so we don't visit the same successor twice SmallPtrSet<BasicBlock *, 8> VisitedSuccs; // Handle the first successor without using the worklist. VisitedSuccs.insert(*I); Pred = BB; BB = *I; ++I; for (; I != E; ++I) if (VisitedSuccs.insert(*I)) Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); goto NextIteration; }
bool TypeChecker::validateGenericFuncSignature(AbstractFunctionDecl *func) { bool invalid = false; // Create the archetype builder. ArchetypeBuilder builder = createArchetypeBuilder(func->getParentModule()); // Type check the function declaration, treating all generic type // parameters as dependent, unresolved. DependentGenericTypeResolver dependentResolver(builder); if (checkGenericFuncSignature(*this, &builder, func, dependentResolver)) invalid = true; // If this triggered a recursive validation, back out: we're done. // FIXME: This is an awful hack. if (func->hasType()) return !func->isInvalid(); // Finalize the generic requirements. (void)builder.finalize(func->getLoc()); // The archetype builder now has all of the requirements, although there might // still be errors that have not yet been diagnosed. Revert the generic // function signature and type-check it again, completely. revertGenericFuncSignature(func); CompleteGenericTypeResolver completeResolver(*this, builder); if (checkGenericFuncSignature(*this, nullptr, func, completeResolver)) invalid = true; // The generic function signature is complete and well-formed. Determine // the type of the generic function. // Collect the complete set of generic parameter types. SmallVector<GenericTypeParamType *, 4> allGenericParams; collectGenericParamTypes(func->getGenericParams(), func->getDeclContext()->getGenericSignatureOfContext(), allGenericParams); auto sig = builder.getGenericSignature(allGenericParams); // Debugging of the archetype builder and generic signature generation. if (sig && Context.LangOpts.DebugGenericSignatures) { func->dumpRef(llvm::errs()); llvm::errs() << "\n"; builder.dump(llvm::errs()); llvm::errs() << "Generic signature: "; sig->print(llvm::errs()); llvm::errs() << "\n"; llvm::errs() << "Canonical generic signature: "; sig->getCanonicalSignature()->print(llvm::errs()); llvm::errs() << "\n"; llvm::errs() << "Canonical generic signature for mangling: "; sig->getCanonicalManglingSignature(*func->getParentModule()) ->print(llvm::errs()); llvm::errs() << "\n"; } func->setGenericSignature(sig); if (invalid) { func->overwriteType(ErrorType::get(Context)); return true; } // Compute the function type. Type funcTy; Type initFuncTy; if (auto fn = dyn_cast<FuncDecl>(func)) { funcTy = fn->getBodyResultTypeLoc().getType(); if (!funcTy) { funcTy = TupleType::getEmpty(Context); } else { funcTy = getResultType(*this, fn, funcTy); } } else if (auto ctor = dyn_cast<ConstructorDecl>(func)) { // FIXME: shouldn't this just be // ctor->getDeclContext()->getDeclaredInterfaceType()? if (ctor->getDeclContext()->getAsProtocolOrProtocolExtensionContext()) { funcTy = ctor->getDeclContext()->getProtocolSelf()->getDeclaredType(); } else { funcTy = ctor->getExtensionType()->getAnyNominal() ->getDeclaredInterfaceType(); } // Adjust result type for failability. if (ctor->getFailability() != OTK_None) funcTy = OptionalType::get(ctor->getFailability(), funcTy); initFuncTy = funcTy; } else { assert(isa<DestructorDecl>(func)); funcTy = TupleType::getEmpty(Context); } auto paramLists = func->getParameterLists(); SmallVector<ParameterList*, 4> storedParamLists; // FIXME: Destructors don't have the '()' pattern in their signature, so // paste it here. if (isa<DestructorDecl>(func)) { assert(paramLists.size() == 1 && "Only the self paramlist"); storedParamLists.push_back(paramLists[0]); storedParamLists.push_back(ParameterList::createEmpty(Context)); paramLists = storedParamLists; } bool hasSelf = func->getDeclContext()->isTypeContext(); for (unsigned i = 0, e = paramLists.size(); i != e; ++i) { Type argTy; Type initArgTy; Type selfTy; if (i == e-1 && hasSelf) { selfTy = func->computeInterfaceSelfType(/*isInitializingCtor=*/false); // Substitute in our own 'self' parameter. argTy = selfTy; if (initFuncTy) { initArgTy = func->computeInterfaceSelfType(/*isInitializingCtor=*/true); } } else { argTy = paramLists[e - i - 1]->getType(Context); // For an implicit declaration, our argument type will be in terms of // archetypes rather than dependent types. Replace the // archetypes with their corresponding dependent types. if (func->isImplicit()) { argTy = ArchetypeBuilder::mapTypeOutOfContext(func, argTy); } if (initFuncTy) initArgTy = argTy; } auto info = applyFunctionTypeAttributes(func, i); // FIXME: We shouldn't even get here if the function isn't locally generic // to begin with, but fixing that requires a lot of reengineering for local // definitions in generic contexts. if (sig && i == e-1) { if (func->getGenericParams()) { // Collect all generic params referenced in parameter types, // return type or requirements. SmallPtrSet<GenericTypeParamDecl *, 4> referencedGenericParams; argTy.visit([&referencedGenericParams](Type t) { if (isa<GenericTypeParamType>(t.getCanonicalTypeOrNull())) { referencedGenericParams.insert( t->castTo<GenericTypeParamType>()->getDecl()); } }); funcTy.visit([&referencedGenericParams](Type t) { if (isa<GenericTypeParamType>(t.getCanonicalTypeOrNull())) { referencedGenericParams.insert( t->castTo<GenericTypeParamType>()->getDecl()); } }); auto requirements = sig->getRequirements(); for (auto req : requirements) { if (req.getKind() == RequirementKind::SameType) { // Same type requirements may allow for generic // inference, even if this generic parameter // is not mentioned in the function signature. // TODO: Make the test more precise. auto left = req.getFirstType(); auto right = req.getSecondType(); // For now consider any references inside requirements // as a possibility to infer the generic type. left.visit([&referencedGenericParams](Type t) { if (isa<GenericTypeParamType>(t.getCanonicalTypeOrNull())) { referencedGenericParams.insert( t->castTo<GenericTypeParamType>()->getDecl()); } }); right.visit([&referencedGenericParams](Type t) { if (isa<GenericTypeParamType>(t.getCanonicalTypeOrNull())) { referencedGenericParams.insert( t->castTo<GenericTypeParamType>()->getDecl()); } }); } } // Find the depth of the function's own generic parameters. unsigned fnGenericParamsDepth = func->getGenericParams()->getDepth(); // Check that every generic parameter type from the signature is // among referencedArchetypes. for (auto *genParam : sig->getGenericParams()) { auto *paramDecl = genParam->getDecl(); if (paramDecl->getDepth() != fnGenericParamsDepth) continue; if (!referencedGenericParams.count(paramDecl)) { // Produce an error that this generic parameter cannot be bound. diagnose(paramDecl->getLoc(), diag::unreferenced_generic_parameter, paramDecl->getNameStr()); func->setInvalid(); } } } funcTy = GenericFunctionType::get(sig, argTy, funcTy, info); if (initFuncTy) initFuncTy = GenericFunctionType::get(sig, initArgTy, initFuncTy, info); } else { funcTy = FunctionType::get(argTy, funcTy, info); if (initFuncTy) initFuncTy = FunctionType::get(initArgTy, initFuncTy, info); } } // Record the interface type. func->setInterfaceType(funcTy); if (initFuncTy) cast<ConstructorDecl>(func)->setInitializerInterfaceType(initFuncTy); return false; }