// Traverse the DFG and collect the set dead RefNodes and the set of // dead instructions. Return "true" if any of these sets is non-empty, // "false" otherwise. bool DeadCodeElimination::collect() { // This function works by first finding all live nodes. The dead nodes // are then the complement of the set of live nodes. // // Assume that all nodes are dead. Identify instructions which must be // considered live, i.e. instructions with observable side-effects, such // as calls and stores. All arguments of such instructions are considered // live. For each live def, all operands used in the corresponding // instruction are considered live. For each live use, all its reaching // defs are considered live. LiveNodes.clear(); SetVector<NodeId> WorkQ; for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) scanInstr(IA, WorkQ); while (!WorkQ.empty()) { NodeId N = *WorkQ.begin(); WorkQ.remove(N); LiveNodes.insert(N); auto RA = DFG.addr<RefNode*>(N); if (DFG.IsDef(RA)) processDef(RA, WorkQ); else processUse(RA, WorkQ); } if (trace()) { dbgs() << "Live nodes:\n"; for (NodeId N : LiveNodes) { auto RA = DFG.addr<RefNode*>(N); dbgs() << PrintNode<RefNode*>(RA, DFG) << "\n"; } } auto IsDead = [this] (NodeAddr<InstrNode*> IA) -> bool { for (NodeAddr<DefNode*> DA : IA.Addr->members_if(DFG.IsDef, DFG)) if (LiveNodes.count(DA.Id)) return false; return true; }; for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) { for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) { for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG)) if (!LiveNodes.count(RA.Id)) DeadNodes.insert(RA.Id); if (DFG.IsCode<NodeAttrs::Stmt>(IA)) if (isLiveInstr(NodeAddr<StmtNode*>(IA).Addr->getCode())) continue; if (IsDead(IA)) { DeadInstrs.insert(IA.Id); if (trace()) dbgs() << "Dead instr: " << PrintNode<InstrNode*>(IA, DFG) << "\n"; } } } return !DeadNodes.empty(); }
void GCPtrTracker::recalculateBBsStates() { SetVector<const BasicBlock *> Worklist; // TODO: This order is suboptimal, it's better to replace it with priority // queue where priority is RPO number of BB. for (auto &BBI : BlockMap) Worklist.insert(BBI.first); // This loop iterates the AvailableIn/Out sets until it converges. // The AvailableIn and AvailableOut sets decrease as we iterate. while (!Worklist.empty()) { const BasicBlock *BB = Worklist.pop_back_val(); BasicBlockState *BBS = BlockMap[BB]; size_t OldInCount = BBS->AvailableIn.size(); for (const BasicBlock *PBB : predecessors(BB)) set_intersect(BBS->AvailableIn, BlockMap[PBB]->AvailableOut); assert(OldInCount >= BBS->AvailableIn.size() && "invariant!"); bool InputsChanged = OldInCount != BBS->AvailableIn.size(); bool ContributionChanged = removeValidUnrelocatedDefs(BB, BBS, BBS->Contribution); if (!InputsChanged && !ContributionChanged) continue; size_t OldOutCount = BBS->AvailableOut.size(); transferBlock(BB, *BBS, ContributionChanged); if (OldOutCount != BBS->AvailableOut.size()) { assert(OldOutCount > BBS->AvailableOut.size() && "invariant!"); Worklist.insert(succ_begin(BB), succ_end(BB)); } } }
// If a linkonce global is present in the MustPreserveSymbols, we need to make // sure we honor this. To force the compiler to not drop it, we add it to the // "llvm.compiler.used" global. void LTOCodeGenerator::preserveDiscardableGVs( Module &TheModule, llvm::function_ref<bool(const GlobalValue &)> mustPreserveGV) { SetVector<Constant *> UsedValuesSet; if (GlobalVariable *LLVMUsed = TheModule.getGlobalVariable("llvm.compiler.used")) { ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); for (auto &V : Inits->operands()) UsedValuesSet.insert(cast<Constant>(&V)); LLVMUsed->eraseFromParent(); } llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(TheModule.getContext()); auto mayPreserveGlobal = [&](GlobalValue &GV) { if (!GV.isDiscardableIfUnused() || GV.isDeclaration()) return; if (!mustPreserveGV(GV)) return; if (GV.hasAvailableExternallyLinkage()) { emitWarning( (Twine("Linker asked to preserve available_externally global: '") + GV.getName() + "'").str()); return; } if (GV.hasInternalLinkage()) { emitWarning((Twine("Linker asked to preserve internal global: '") + GV.getName() + "'").str()); return; } UsedValuesSet.insert(ConstantExpr::getBitCast(&GV, i8PTy)); }; for (auto &GV : TheModule) mayPreserveGlobal(GV); for (auto &GV : TheModule.globals()) mayPreserveGlobal(GV); for (auto &GV : TheModule.aliases()) mayPreserveGlobal(GV); if (UsedValuesSet.empty()) return; llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, UsedValuesSet.size()); auto *LLVMUsed = new llvm::GlobalVariable( TheModule, ATy, false, llvm::GlobalValue::AppendingLinkage, llvm::ConstantArray::get(ATy, UsedValuesSet.getArrayRef()), "llvm.compiler.used"); LLVMUsed->setSection("llvm.metadata"); }
bool PlaceSafepoints::runOnFunction(Function &F) { if (F.isDeclaration() || F.empty()) { // This is a declaration, nothing to do. Must exit early to avoid crash in // dom tree calculation return false; } if (isGCSafepointPoll(F)) { // Given we're inlining this inside of safepoint poll insertion, this // doesn't make any sense. Note that we do make any contained calls // parseable after we inline a poll. return false; } if (!shouldRewriteFunction(F)) return false; bool modified = false; // In various bits below, we rely on the fact that uses are reachable from // defs. When there are basic blocks unreachable from the entry, dominance // and reachablity queries return non-sensical results. Thus, we preprocess // the function to ensure these properties hold. modified |= removeUnreachableBlocks(F); // STEP 1 - Insert the safepoint polling locations. We do not need to // actually insert parse points yet. That will be done for all polls and // calls in a single pass. DominatorTree DT; DT.recalculate(F); SmallVector<Instruction *, 16> PollsNeeded; std::vector<CallSite> ParsePointNeeded; if (enableBackedgeSafepoints(F)) { // Construct a pass manager to run the LoopPass backedge logic. We // need the pass manager to handle scheduling all the loop passes // appropriately. Doing this by hand is painful and just not worth messing // with for the moment. legacy::FunctionPassManager FPM(F.getParent()); bool CanAssumeCallSafepoints = enableCallSafepoints(F); PlaceBackedgeSafepointsImpl *PBS = new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints); FPM.add(PBS); FPM.run(F); // We preserve dominance information when inserting the poll, otherwise // we'd have to recalculate this on every insert DT.recalculate(F); auto &PollLocations = PBS->PollLocations; auto OrderByBBName = [](Instruction *a, Instruction *b) { return a->getParent()->getName() < b->getParent()->getName(); }; // We need the order of list to be stable so that naming ends up stable // when we split edges. This makes test cases much easier to write. std::sort(PollLocations.begin(), PollLocations.end(), OrderByBBName); // We can sometimes end up with duplicate poll locations. This happens if // a single loop is visited more than once. The fact this happens seems // wrong, but it does happen for the split-backedge.ll test case. PollLocations.erase(std::unique(PollLocations.begin(), PollLocations.end()), PollLocations.end()); // Insert a poll at each point the analysis pass identified // The poll location must be the terminator of a loop latch block. for (TerminatorInst *Term : PollLocations) { // We are inserting a poll, the function is modified modified = true; if (SplitBackedge) { // Split the backedge of the loop and insert the poll within that new // basic block. This creates a loop with two latches per original // latch (which is non-ideal), but this appears to be easier to // optimize in practice than inserting the poll immediately before the // latch test. // Since this is a latch, at least one of the successors must dominate // it. Its possible that we have a) duplicate edges to the same header // and b) edges to distinct loop headers. We need to insert pools on // each. SetVector<BasicBlock *> Headers; for (unsigned i = 0; i < Term->getNumSuccessors(); i++) { BasicBlock *Succ = Term->getSuccessor(i); if (DT.dominates(Succ, Term->getParent())) { Headers.insert(Succ); } } assert(!Headers.empty() && "poll location is not a loop latch?"); // The split loop structure here is so that we only need to recalculate // the dominator tree once. Alternatively, we could just keep it up to // date and use a more natural merged loop. SetVector<BasicBlock *> SplitBackedges; for (BasicBlock *Header : Headers) { BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, &DT); PollsNeeded.push_back(NewBB->getTerminator()); NumBackedgeSafepoints++; } } else { // Split the latch block itself, right before the terminator. PollsNeeded.push_back(Term); NumBackedgeSafepoints++; } } } if (enableEntrySafepoints(F)) { Instruction *Location = findLocationForEntrySafepoint(F, DT); if (!Location) { // policy choice not to insert? } else { PollsNeeded.push_back(Location); modified = true; NumEntrySafepoints++; } } // Now that we've identified all the needed safepoint poll locations, insert // safepoint polls themselves. for (Instruction *PollLocation : PollsNeeded) { std::vector<CallSite> RuntimeCalls; InsertSafepointPoll(PollLocation, RuntimeCalls); ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(), RuntimeCalls.end()); } // If we've been asked to not wrap the calls with gc.statepoint, then we're // done. In the near future, this option will be "constant folded" to true, // and the code below that deals with insert gc.statepoint calls will be // removed. Wrapping potentially safepointing calls in gc.statepoint will // then become the responsibility of the RewriteStatepointsForGC pass. if (NoStatepoints) return modified; PollsNeeded.clear(); // make sure we don't accidentally use // The dominator tree has been invalidated by the inlining performed in the // above loop. TODO: Teach the inliner how to update the dom tree? DT.recalculate(F); if (enableCallSafepoints(F)) { std::vector<CallSite> Calls; findCallSafepoints(F, Calls); NumCallSafepoints += Calls.size(); ParsePointNeeded.insert(ParsePointNeeded.end(), Calls.begin(), Calls.end()); } // Unique the vectors since we can end up with duplicates if we scan the call // site for call safepoints after we add it for entry or backedge. The // only reason we need tracking at all is that some functions might have // polls but not call safepoints and thus we might miss marking the runtime // calls for the polls. (This is useful in test cases!) unique_unsorted(ParsePointNeeded); // Any parse point (no matter what source) will be handled here // We're about to start modifying the function if (!ParsePointNeeded.empty()) modified = true; // Now run through and insert the safepoints, but do _NOT_ update or remove // any existing uses. We have references to live variables that need to // survive to the last iteration of this loop. std::vector<Value *> Results; Results.reserve(ParsePointNeeded.size()); for (size_t i = 0; i < ParsePointNeeded.size(); i++) { CallSite &CS = ParsePointNeeded[i]; // For invoke statepoints we need to remove all phi nodes at the normal // destination block. // Reason for this is that we can place gc_result only after last phi node // in basic block. We will get malformed code after RAUW for the // gc_result if one of this phi nodes uses result from the invoke. if (InvokeInst *Invoke = dyn_cast<InvokeInst>(CS.getInstruction())) { normalizeForInvokeSafepoint(Invoke->getNormalDest(), Invoke->getParent()); } Value *GCResult = ReplaceWithStatepoint(CS); Results.push_back(GCResult); } assert(Results.size() == ParsePointNeeded.size()); // Adjust all users of the old call sites to use the new ones instead for (size_t i = 0; i < ParsePointNeeded.size(); i++) { CallSite &CS = ParsePointNeeded[i]; Value *GCResult = Results[i]; if (GCResult) { // Can not RAUW for the invoke gc result in case of phi nodes preset. assert(CS.isCall() || !isa<PHINode>(cast<Instruction>(GCResult)->getParent()->begin())); // Replace all uses with the new call CS.getInstruction()->replaceAllUsesWith(GCResult); } // Now that we've handled all uses, remove the original call itself // Note: The insert point can't be the deleted instruction! CS.getInstruction()->eraseFromParent(); } return modified; }
bool PlaceSafepoints::runOnFunction(Function &F) { if (F.isDeclaration() || F.empty()) { // This is a declaration, nothing to do. Must exit early to avoid crash in // dom tree calculation return false; } if (isGCSafepointPoll(F)) { // Given we're inlining this inside of safepoint poll insertion, this // doesn't make any sense. Note that we do make any contained calls // parseable after we inline a poll. return false; } if (!shouldRewriteFunction(F)) return false; const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); bool Modified = false; // In various bits below, we rely on the fact that uses are reachable from // defs. When there are basic blocks unreachable from the entry, dominance // and reachablity queries return non-sensical results. Thus, we preprocess // the function to ensure these properties hold. Modified |= removeUnreachableBlocks(F); // STEP 1 - Insert the safepoint polling locations. We do not need to // actually insert parse points yet. That will be done for all polls and // calls in a single pass. DominatorTree DT; DT.recalculate(F); SmallVector<Instruction *, 16> PollsNeeded; std::vector<CallSite> ParsePointNeeded; if (enableBackedgeSafepoints(F)) { // Construct a pass manager to run the LoopPass backedge logic. We // need the pass manager to handle scheduling all the loop passes // appropriately. Doing this by hand is painful and just not worth messing // with for the moment. legacy::FunctionPassManager FPM(F.getParent()); bool CanAssumeCallSafepoints = enableCallSafepoints(F); auto *PBS = new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints); FPM.add(PBS); FPM.run(F); // We preserve dominance information when inserting the poll, otherwise // we'd have to recalculate this on every insert DT.recalculate(F); auto &PollLocations = PBS->PollLocations; auto OrderByBBName = [](Instruction *a, Instruction *b) { return a->getParent()->getName() < b->getParent()->getName(); }; // We need the order of list to be stable so that naming ends up stable // when we split edges. This makes test cases much easier to write. llvm::sort(PollLocations.begin(), PollLocations.end(), OrderByBBName); // We can sometimes end up with duplicate poll locations. This happens if // a single loop is visited more than once. The fact this happens seems // wrong, but it does happen for the split-backedge.ll test case. PollLocations.erase(std::unique(PollLocations.begin(), PollLocations.end()), PollLocations.end()); // Insert a poll at each point the analysis pass identified // The poll location must be the terminator of a loop latch block. for (TerminatorInst *Term : PollLocations) { // We are inserting a poll, the function is modified Modified = true; if (SplitBackedge) { // Split the backedge of the loop and insert the poll within that new // basic block. This creates a loop with two latches per original // latch (which is non-ideal), but this appears to be easier to // optimize in practice than inserting the poll immediately before the // latch test. // Since this is a latch, at least one of the successors must dominate // it. Its possible that we have a) duplicate edges to the same header // and b) edges to distinct loop headers. We need to insert pools on // each. SetVector<BasicBlock *> Headers; for (unsigned i = 0; i < Term->getNumSuccessors(); i++) { BasicBlock *Succ = Term->getSuccessor(i); if (DT.dominates(Succ, Term->getParent())) { Headers.insert(Succ); } } assert(!Headers.empty() && "poll location is not a loop latch?"); // The split loop structure here is so that we only need to recalculate // the dominator tree once. Alternatively, we could just keep it up to // date and use a more natural merged loop. SetVector<BasicBlock *> SplitBackedges; for (BasicBlock *Header : Headers) { BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, &DT); PollsNeeded.push_back(NewBB->getTerminator()); NumBackedgeSafepoints++; } } else { // Split the latch block itself, right before the terminator. PollsNeeded.push_back(Term); NumBackedgeSafepoints++; } } } if (enableEntrySafepoints(F)) { if (Instruction *Location = findLocationForEntrySafepoint(F, DT)) { PollsNeeded.push_back(Location); Modified = true; NumEntrySafepoints++; } // TODO: else we should assert that there was, in fact, a policy choice to // not insert a entry safepoint poll. } // Now that we've identified all the needed safepoint poll locations, insert // safepoint polls themselves. for (Instruction *PollLocation : PollsNeeded) { std::vector<CallSite> RuntimeCalls; InsertSafepointPoll(PollLocation, RuntimeCalls, TLI); ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(), RuntimeCalls.end()); } return Modified; }
// Erase the nodes given in the Nodes set from DFG. In addition to removing // them from the DFG, if a node corresponds to a statement, the corresponding // machine instruction is erased from the function. bool DeadCodeElimination::erase(const SetVector<NodeId> &Nodes) { if (Nodes.empty()) return false; // Prepare the actual set of ref nodes to remove: ref nodes from Nodes // are included directly, for each InstrNode in Nodes, include the set // of all RefNodes from it. NodeList DRNs, DINs; for (auto I : Nodes) { auto BA = DFG.addr<NodeBase*>(I); uint16_t Type = BA.Addr->getType(); if (Type == NodeAttrs::Ref) { DRNs.push_back(DFG.addr<RefNode*>(I)); continue; } // If it's a code node, add all ref nodes from it. uint16_t Kind = BA.Addr->getKind(); if (Kind == NodeAttrs::Stmt || Kind == NodeAttrs::Phi) { for (auto N : NodeAddr<CodeNode*>(BA).Addr->members(DFG)) DRNs.push_back(N); DINs.push_back(DFG.addr<InstrNode*>(I)); } else { llvm_unreachable("Unexpected code node"); return false; } } // Sort the list so that use nodes are removed first. This makes the // "unlink" functions a bit faster. auto UsesFirst = [] (NodeAddr<RefNode*> A, NodeAddr<RefNode*> B) -> bool { uint16_t KindA = A.Addr->getKind(), KindB = B.Addr->getKind(); if (KindA == NodeAttrs::Use && KindB == NodeAttrs::Def) return true; if (KindA == NodeAttrs::Def && KindB == NodeAttrs::Use) return false; return A.Id < B.Id; }; llvm::sort(DRNs, UsesFirst); if (trace()) dbgs() << "Removing dead ref nodes:\n"; for (NodeAddr<RefNode*> RA : DRNs) { if (trace()) dbgs() << " " << PrintNode<RefNode*>(RA, DFG) << '\n'; if (DFG.IsUse(RA)) DFG.unlinkUse(RA, true); else if (DFG.IsDef(RA)) DFG.unlinkDef(RA, true); } // Now, remove all dead instruction nodes. for (NodeAddr<InstrNode*> IA : DINs) { NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG); BA.Addr->removeMember(IA, DFG); if (!DFG.IsCode<NodeAttrs::Stmt>(IA)) continue; MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode(); if (trace()) dbgs() << "erasing: " << *MI; MI->eraseFromParent(); } return true; }
bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop) { MachineBasicBlock *Header = Loop ? Loop->getHeader() : &*MF.begin(); SetVector<MachineBasicBlock *> RewriteSuccs; // DFS through Loop's body, looking for for irreducible control flow. Loop is // natural, and we stay in its body, and we treat any nested loops // monolithically, so any cycles we encounter indicate irreducibility. SmallPtrSet<MachineBasicBlock *, 8> OnStack; SmallPtrSet<MachineBasicBlock *, 8> Visited; SmallVector<SuccessorList, 4> LoopWorklist; LoopWorklist.push_back(SuccessorList(Header)); OnStack.insert(Header); Visited.insert(Header); while (!LoopWorklist.empty()) { SuccessorList &Top = LoopWorklist.back(); if (Top.HasNext()) { MachineBasicBlock *Next = Top.Next(); if (Next == Header || (Loop && !Loop->contains(Next))) continue; if (LLVM_LIKELY(OnStack.insert(Next).second)) { if (!Visited.insert(Next).second) { OnStack.erase(Next); continue; } MachineLoop *InnerLoop = MLI.getLoopFor(Next); if (InnerLoop != Loop) LoopWorklist.push_back(SuccessorList(InnerLoop)); else LoopWorklist.push_back(SuccessorList(Next)); } else { RewriteSuccs.insert(Top.getBlock()); } continue; } OnStack.erase(Top.getBlock()); LoopWorklist.pop_back(); } // Most likely, we didn't find any irreducible control flow. if (LLVM_LIKELY(RewriteSuccs.empty())) return false; DEBUG(dbgs() << "Irreducible control flow detected!\n"); // Ok. We have irreducible control flow! Create a dispatch block which will // contains a jump table to any block in the problematic set of blocks. MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); MF.insert(MF.end(), Dispatch); MLI.changeLoopFor(Dispatch, Loop); // Add the jump table. const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32)); // Add the register which will be used to tell the jump table which block to // jump to. MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); MIB.addReg(Reg); // Collect all the blocks which need to have their successors rewritten, // add the successors to the jump table, and remember their index. DenseMap<MachineBasicBlock *, unsigned> Indices; SmallVector<MachineBasicBlock *, 4> SuccWorklist(RewriteSuccs.begin(), RewriteSuccs.end()); while (!SuccWorklist.empty()) { MachineBasicBlock *MBB = SuccWorklist.pop_back_val(); auto Pair = Indices.insert(std::make_pair(MBB, 0)); if (!Pair.second) continue; unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; DEBUG(dbgs() << printMBBReference(*MBB) << " has index " << Index << "\n"); Pair.first->second = Index; for (auto Pred : MBB->predecessors()) RewriteSuccs.insert(Pred); MIB.addMBB(MBB); Dispatch->addSuccessor(MBB); MetaBlock Meta(MBB); for (auto *Succ : Meta.successors()) if (Succ != Header && (!Loop || Loop->contains(Succ))) SuccWorklist.push_back(Succ); } // Rewrite the problematic successors for every block in RewriteSuccs. // For simplicity, we just introduce a new block for every edge we need to // rewrite. Fancier things are possible. for (MachineBasicBlock *MBB : RewriteSuccs) { DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map; for (auto *Succ : MBB->successors()) { if (!Indices.count(Succ)) continue; MachineBasicBlock *Split = MF.CreateMachineBasicBlock(); MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ) : MF.end(), Split); MLI.changeLoopFor(Split, Loop); // Set the jump table's register of the index of the block we wish to // jump to, and jump to the jump table. BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg) .addImm(Indices[Succ]); BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR)) .addMBB(Dispatch); Split->addSuccessor(Dispatch); Map[Succ] = Split; } // Remap the terminator operands and the successor list. for (MachineInstr &Term : MBB->terminators()) for (auto &Op : Term.explicit_uses()) if (Op.isMBB() && Indices.count(Op.getMBB())) Op.setMBB(Map[Op.getMBB()]); for (auto Rewrite : Map) MBB->replaceSuccessor(Rewrite.first, Rewrite.second); } // Create a fake default label, because br_table requires one. MIB.addMBB(MIB.getInstr() ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1) .getMBB()); return true; }