Shape *MakeLoop(BlockSet &Blocks, BlockSet& Entries, BlockSet &NextEntries) { // Find the inner blocks in this loop. Proceed backwards from the entries until // you reach a seen block, collecting as you go. BlockSet InnerBlocks; BlockSet Queue = Entries; while (Queue.size() > 0) { Block *Curr = *(Queue.begin()); Queue.erase(Queue.begin()); if (InnerBlocks.find(Curr) == InnerBlocks.end()) { // This element is new, mark it as inner and remove from outer InnerBlocks.insert(Curr); Blocks.erase(Curr); // Add the elements prior to it for (BlockBranchMap::iterator iter = Curr->BranchesIn.begin(); iter != Curr->BranchesIn.end(); iter++) { Queue.insert(iter->first); } } } assert(InnerBlocks.size() > 0); for (BlockSet::iterator iter = InnerBlocks.begin(); iter != InnerBlocks.end(); iter++) { Block *Curr = *iter; for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { Block *Possible = iter->first; if (InnerBlocks.find(Possible) == InnerBlocks.end() && NextEntries.find(Possible) == NextEntries.find(Possible)) { NextEntries.insert(Possible); } } } PrintDebug("creating loop block:\n"); DebugDump(InnerBlocks, " inner blocks:"); DebugDump(Entries, " inner entries:"); DebugDump(Blocks, " outer blocks:"); DebugDump(NextEntries, " outer entries:"); // TODO: Optionally hoist additional blocks into the loop LoopShape *Loop = new LoopShape(); Notice(Loop); // Solipsize the loop, replacing with break/continue and marking branches as Processed (will not affect later calculations) // A. Branches to the loop entries become a continue to this shape for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { Solipsize(*iter, Branch::Continue, Loop, InnerBlocks); } // B. Branches to outside the loop (a next entry) become breaks on this shape for (BlockSet::iterator iter = NextEntries.begin(); iter != NextEntries.end(); iter++) { Solipsize(*iter, Branch::Break, Loop, InnerBlocks); } // Finish up Shape *Inner = Process(InnerBlocks, Entries, NULL); Loop->Inner = Inner; return Loop; }
// If a block has multiple entries but no exits, and it is small enough, it is useful to split it. // A common example is a C++ function where everything ends up at a final exit block and does some // RAII cleanup. Without splitting, we will be forced to introduce labelled loops to allow // reaching the final block void SplitDeadEnds() { unsigned TotalCodeSize = 0; for (BlockSet::iterator iter = Live.begin(); iter != Live.end(); iter++) { Block *Curr = *iter; TotalCodeSize += strlen(Curr->Code); } BlockSet Splits; BlockSet Removed; //DebugDump(Live, "before"); for (BlockSet::iterator iter = Live.begin(); iter != Live.end(); iter++) { Block *Original = *iter; if (Original->BranchesIn.size() <= 1 || Original->BranchesOut.size() > 0) continue; // only dead ends, for now if (contains(Original->BranchesOut, Original)) continue; // cannot split a looping node if (strlen(Original->Code)*(Original->BranchesIn.size()-1) > TotalCodeSize/5) continue; // if splitting increases raw code size by a significant amount, abort // Split the node (for simplicity, we replace all the blocks, even though we could have reused the original) PrintDebug("Splitting block %d\n", Original->Id); for (BlockSet::iterator iter = Original->BranchesIn.begin(); iter != Original->BranchesIn.end(); iter++) { Block *Prior = *iter; Block *Split = new Block(Original->Code, Original->BranchVar); Parent->AddBlock(Split); PrintDebug(" to %d\n", Split->Id); Split->BranchesIn.insert(Prior); Branch *Details = Prior->BranchesOut[Original]; Prior->BranchesOut[Split] = new Branch(Details->Condition, Details->Code); Prior->BranchesOut.erase(Original); for (BlockBranchMap::iterator iter = Original->BranchesOut.begin(); iter != Original->BranchesOut.end(); iter++) { Block *Post = iter->first; Branch *Details = iter->second; Split->BranchesOut[Post] = new Branch(Details->Condition, Details->Code); Post->BranchesIn.insert(Split); } Splits.insert(Split); Removed.insert(Original); } for (BlockBranchMap::iterator iter = Original->BranchesOut.begin(); iter != Original->BranchesOut.end(); iter++) { Block *Post = iter->first; Post->BranchesIn.erase(Original); } //DebugDump(Live, "mid"); } for (BlockSet::iterator iter = Splits.begin(); iter != Splits.end(); iter++) { Live.insert(*iter); } for (BlockSet::iterator iter = Removed.begin(); iter != Removed.end(); iter++) { Live.erase(*iter); } //DebugDump(Live, "after"); }
static BlockSet getBlocksWithBranchesThatDependOn( const BlockSet& blocksWithBackwardsBranches, const InstructionSet& instructionsThatCanObserveSideEffects, DependenceAnalysis* dependenceAnalysis, ControlDependenceAnalysis* controlDependenceAnalysis) { BlockSet blocksWithDependentBranches; report(" Getting blocks with branches that can observe side-effects"); for(auto blockWithBranch : blocksWithBackwardsBranches) { auto branch = getBranch(blockWithBranch); if(branch == nullptr) continue; auto controlDependentInstructions = getControlDependentInstructions( branch, instructionsThatCanObserveSideEffects, controlDependenceAnalysis); for(auto instruction : controlDependentInstructions) { if(dependenceAnalysis->dependsOn(instruction, branch)) { report(" " << blockWithBranch->label()); blocksWithDependentBranches.insert(blockWithBranch); break; } } } return blocksWithDependentBranches; }
static void chaseDownPredecessors(iterator node, Register value, DataflowGraph* dfg, dataflow_iterator block, NodeList& nodes, InstructionToNodeMap& instructionToNodes, BlockSet& visited) { if(!visited.insert(block).second) return; assert(block->aliveIn().count(value) != 0); for(auto predecessor : block->predecessors()) { if(predecessor->aliveOut().count(value) == 0) continue; bool foundAnyDefinitions = false; // check the body for a definition for(auto instruction = predecessor->instructions().rbegin(); instruction != predecessor->instructions().rend(); ++instruction) { for(auto destination : instruction->d) { if(*destination.pointer == value) { auto producer = nodes.end(); auto ptx = static_cast<PTXInstruction*>(instruction->i); auto existingNode = instructionToNodes.find(ptx); if(existingNode == instructionToNodes.end()) { producer = nodes.insert(nodes.end(), Node(ptx)); instructionToNodes.insert( std::make_pair(ptx, producer)); } else { producer = existingNode->second; } report(" " << producer->instruction->toString() << " -> " << node->instruction->toString()); node->predecessors.push_back(producer); producer->successors.push_back(node); foundAnyDefinitions = true; break; } } } if(foundAnyDefinitions) continue; // if no definitions were found, recurse through predecessors chaseDownPredecessors(node, value, dfg, predecessor, nodes, instructionToNodes, visited); } }
void DeadCodeEliminationPass::runOnKernel(ir::IRKernel& k) { report("Running dead code elimination on kernel " << k.name); reportE(REPORT_PTX, k); Analysis* dfgAnalysis = getAnalysis(Analysis::DataflowGraphAnalysis); assert(dfgAnalysis != 0); analysis::DataflowGraph& dfg = *static_cast<analysis::DataflowGraph*>(dfgAnalysis); assert(dfg.ssa() != analysis::DataflowGraph::SsaType::None); BlockSet blocks; report(" Starting by scanning all basic blocks"); for(iterator block = dfg.begin(); block != dfg.end(); ++block) { report(" Queueing up BB_" << block->id()); blocks.insert(block); } while(!blocks.empty()) { iterator block = *blocks.begin(); blocks.erase(blocks.begin()); eliminateDeadInstructions(dfg, blocks, block); } report("Finished running dead code elimination on kernel " << k.name); reportE(REPORT_PTX, k); }
static BlockSet getBlocksWithCallsToFuctionsThatObserveSideEffects( ir::IRKernel& k) { BlockSet blocks; report(" Getting functions that can observe side-effects"); for(auto block = k.cfg()->begin(); block != k.cfg()->end(); ++block) { for(auto instruction : block->instructions) { auto ptxInstruction = static_cast<ir::PTXInstruction*>(instruction); // TODO: Check that the target can observe side effects if(ptxInstruction->isCall()) { report(" " << ptxInstruction->toString()); blocks.insert(block); break; } } } return blocks; }
Layer Layer::getSubgraphConnectedToTheseOutputs( const NeuronSet& outputs) const { typedef std::set<size_t> BlockSet; BlockSet blocks; // TODO: eliminate the reundant inserts for(auto& output : outputs) { size_t block = (output / getOutputBlockingFactor()) % this->blocks(); blocks.insert(block); } Layer layer(blocks.size(), getInputBlockingFactor(), getOutputBlockingFactor(), blockStep()); for(auto& block : blocks) { size_t blockIndex = block - *blocks.begin(); layer[blockIndex] = (*this)[block]; layer.at_bias(blockIndex) = at_bias(block); } return layer; }
// Create a list of entries from a block. If LimitTo is provided, only results in that set // will appear void GetBlocksOut(Block *Source, BlockSet& Entries, BlockSet *LimitTo=NULL) { for (BlockBranchMap::iterator iter = Source->BranchesOut.begin(); iter != Source->BranchesOut.end(); iter++) { if (!LimitTo || LimitTo->find(iter->first) != LimitTo->end()) { Entries.insert(iter->first); } } }
static bool allPreviousDefinitionsDominateMove(instruction_iterator move, block_iterator block, BlockSet& visited) { if(!visited.insert(block->id()).second) return true; assert(move->d.size() == 1); auto destination = *move->d.front().pointer; // If the value is defined by a phi with multiple sources, then the // previous definition does not dominate the move for(auto phi = block->phis().begin(); phi != block->phis().end(); ++phi) { if(phi->d == destination) { return false; } } // Check all predecessors with the value live out for(auto predecessor = block->predecessors().begin(); predecessor != block->predecessors().end(); ++predecessor) { if((*predecessor)->aliveOut().count(destination) == 0) continue; if(!allPreviousDefinitionsDominateMove(move, *predecessor, visited)) { return false; } } return true; }
ControlFlowGraph::BlockPointerVector ControlFlowGraph::reverse_topological_sequence() { typedef std::set<iterator, BlockSetCompare> BlockSet; typedef std::queue<iterator> Queue; report("Creating reverse topological order traversal"); BlockSet visited; BlockPointerVector sequence; Queue queue; queue.push(get_exit_block()); while (sequence.size() != size()) { if(queue.empty()) { for (pointer_iterator block = sequence.begin(); block != sequence.end(); ++block) { for (pointer_iterator pred = (*block)->predecessors.begin(); pred != (*block)->predecessors.end(); ++pred) { if (visited.count(*pred) == 0) { queue.push(*pred); break; } } if(!queue.empty()) { break; } } if(queue.empty()) break; // The remaining blocks are unreachable } iterator current = queue.front(); queue.pop(); if(!visited.insert(current).second) continue; sequence.push_back(current); report(" Adding block " << current->label()); for (pointer_iterator block = current->predecessors.begin(); block != current->predecessors.end(); ++block) { bool noDependencies = true; for (pointer_iterator successor = (*block)->successors.begin(); successor != (*block)->successors.end(); ++successor) { if (visited.count(*successor) == 0) { noDependencies = false; break; } } if(noDependencies) { queue.push(*block); } } } return sequence; }
ControlFlowGraph::BlockPointerVector ControlFlowGraph::pre_order_sequence() { typedef std::unordered_set<iterator> BlockSet; typedef std::stack<iterator> Stack; BlockSet visited; BlockPointerVector sequence; Stack stack; if (!empty()) { stack.push(get_entry_block()); visited.insert(get_entry_block()); } while (!stack.empty()) { iterator current = stack.top(); stack.pop(); sequence.push_back(current); // favor the fallthrough iterator fallthrough = end(); if (current->has_fallthrough_edge()) { edge_iterator fallthroughEdge = sequence.back()->get_fallthrough_edge(); if (visited.insert(fallthroughEdge->tail).second) { fallthrough = fallthroughEdge->tail; } } for (pointer_iterator block = current->successors.begin(); block != current->successors.end(); ++block) { if (visited.insert(*block).second) { stack.push(*block); } } if (fallthrough != end()) { stack.push(fallthrough); } } return sequence; }
ControlFlowGraph::BlockPointerVector ControlFlowGraph::post_order_sequence() { typedef std::unordered_set<iterator> BlockSet; typedef std::stack<iterator> Stack; report("Creating post order traversal"); BlockSet visited; BlockPointerVector sequence; Stack stack; if (!empty()) { for (pointer_iterator block = get_entry_block()->successors.begin(); block != get_entry_block()->successors.end(); ++block) { if (visited.insert(*block).second) { stack.push(*block); } } } while (!stack.empty()) { iterator current = stack.top(); bool one = false; for (pointer_iterator block = current->successors.begin(); block != current->successors.end(); ++block) { if (visited.insert(*block).second) { stack.push(*block); one = true; } } if(!one) { stack.pop(); sequence.push_back(current); report(" Adding block " << current->label()); } } report(" Adding block " << get_entry_block()->label()); sequence.push_back(get_entry_block()); return sequence; }
void FindLive(Block *Root) { BlockList ToInvestigate; ToInvestigate.push_back(Root); while (ToInvestigate.size() > 0) { Block *Curr = ToInvestigate.front(); ToInvestigate.pop_front(); if (Live.find(Curr) != Live.end()) continue; Live.insert(Curr); for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { ToInvestigate.push_back(iter->first); } } }
Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) { PrintDebug("creating simple block with block #%d\n", Inner->Id); SimpleShape *Simple = new SimpleShape; Notice(Simple); Simple->Inner = Inner; Inner->Parent = Simple; if (Blocks.size() > 1) { Blocks.erase(Inner); GetBlocksOut(Inner, NextEntries, &Blocks); BlockSet JustInner; JustInner.insert(Inner); for (BlockSet::iterator iter = NextEntries.begin(); iter != NextEntries.end(); iter++) { Solipsize(*iter, Branch::Direct, Simple, JustInner); } } return Simple; }
Shape *MakeMultiple(BlockSet &Blocks, BlockSet& Entries, BlockBlockSetMap& IndependentGroups, Shape *Prev, BlockSet &NextEntries) { PrintDebug("creating multiple block with %d inner groups\n", IndependentGroups.size()); bool Fused = !!(Shape::IsSimple(Prev)); MultipleShape *Multiple = new MultipleShape(); Notice(Multiple); BlockSet CurrEntries; for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end(); iter++) { Block *CurrEntry = iter->first; BlockSet &CurrBlocks = iter->second; PrintDebug(" multiple group with entry %d:\n", CurrEntry->Id); DebugDump(CurrBlocks, " "); // Create inner block CurrEntries.clear(); CurrEntries.insert(CurrEntry); for (BlockSet::iterator iter = CurrBlocks.begin(); iter != CurrBlocks.end(); iter++) { Block *CurrInner = *iter; // Remove the block from the remaining blocks Blocks.erase(CurrInner); // Find new next entries and fix branches to them for (BlockBranchMap::iterator iter = CurrInner->BranchesOut.begin(); iter != CurrInner->BranchesOut.end();) { Block *CurrTarget = iter->first; BlockBranchMap::iterator Next = iter; Next++; if (CurrBlocks.find(CurrTarget) == CurrBlocks.end()) { NextEntries.insert(CurrTarget); Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks); } iter = Next; // increment carefully because Solipsize can remove us } } Multiple->InnerMap[CurrEntry] = Process(CurrBlocks, CurrEntries, NULL); // If we are not fused, then our entries will actually be checked if (!Fused) { CurrEntry->IsCheckedMultipleEntry = true; } } DebugDump(Blocks, " remaining blocks after multiple:"); // Add entries not handled as next entries, they are deferred for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { Block *Entry = *iter; if (IndependentGroups.find(Entry) == IndependentGroups.end()) { NextEntries.insert(Entry); } } return Multiple; }
void StackAllocationPromoter::pruneAllocStackUsage() { DEBUG(llvm::dbgs() << "*** Pruning : " << *ASI); BlockSet Blocks; // Insert all of the blocks that ASI is live in. for (auto UI = ASI->use_begin(), E = ASI->use_end(); UI != E; ++UI) Blocks.insert(UI->getUser()->getParent()); // Clear AllocStack state. LastStoreInBlock.clear(); for (auto Block : Blocks) { StoreInst *SI = promoteAllocationInBlock(Block); LastStoreInBlock[Block] = SI; } DEBUG(llvm::dbgs() << "*** Finished pruning : " << *ASI); }
static void propagateMoveSourceToUsersInBlock(instruction_iterator move, instruction_iterator position, block_iterator block, BlockSet& visited) { // early exit for visited blocks if(!visited.insert(block->id()).second) return; assert(move->d.size() == 1); assert(move->s.size() == 1); auto destination = *move->d.front().pointer; auto moveSource = *move->s.front().pointer; // We can skip PHIs because the use of a PHI would make the removal illegal // replace uses in the block for(; position != block->instructions().end(); ++position) { for(auto source = position->s.begin(); source != position->s.end(); ++source) { if(*source->pointer == destination) { *source->pointer = moveSource; } } } // replace in successors for(auto successor = block->successors().begin(); successor != block->successors().end(); ++successor) { if((*successor)->aliveIn().count(destination) == 0) continue; propagateMoveSourceToUsersInBlock(move, (*successor)->instructions().begin(), *successor, visited); } }
// If a block has multiple entries but no exits, and it is small enough, it is useful to split it. // A common example is a C++ function where everything ends up at a final exit block and does some // RAII cleanup. Without splitting, we will be forced to introduce labelled loops to allow // reaching the final block void SplitDeadEnds() { int TotalCodeSize = 0; for (BlockSet::iterator iter = Live.begin(); iter != Live.end(); iter++) { Block *Curr = *iter; TotalCodeSize += strlen(Curr->Code); } for (BlockSet::iterator iter = Live.begin(); iter != Live.end(); iter++) { Block *Original = *iter; if (Original->BranchesIn.size() <= 1 || Original->BranchesOut.size() > 0) continue; if (strlen(Original->Code)*(Original->BranchesIn.size()-1) > TotalCodeSize/5) continue; // if splitting increases raw code size by a significant amount, abort // Split the node (for simplicity, we replace all the blocks, even though we could have reused the original) for (BlockBranchMap::iterator iter = Original->BranchesIn.begin(); iter != Original->BranchesIn.end(); iter++) { Block *Prior = iter->first; Block *Split = new Block(Original->Code); Split->BranchesIn[Prior] = new Branch(NULL); Prior->BranchesOut[Split] = new Branch(Prior->BranchesOut[Original]->Condition, Prior->BranchesOut[Original]->Code); Prior->BranchesOut.erase(Original); Parent->AddBlock(Split); Live.insert(Split); } } }
static BlockSet getBlocksWithBackwardsBranches(CycleAnalysis* cycleAnalysis) { auto edges = cycleAnalysis->getAllBackEdges(); report(" Getting blocks with backwards branches"); BlockSet backwardsBranchBlocks; for(auto& edge : edges) { if(edge->type != ir::Edge::Branch) continue; auto block = edge->head; if(getBranch(block) == nullptr) continue; backwardsBranchBlocks.insert(block); report(" " << block->label()); } return backwardsBranchBlocks; }
void ConstantPropagationPass::runOnKernel(ir::IRKernel& k) { report("Running constant propagation on kernel " << k.name); Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis"); assert(dfgAnalysis != 0); analysis::DataflowGraph& dfg = *static_cast<analysis::DataflowGraph*>(dfgAnalysis); dfg.convertToSSAType(analysis::DataflowGraph::Minimal); assert(dfg.ssa() == analysis::DataflowGraph::Minimal); BlockSet blocks; report(" Starting by scanning all basic blocks"); for(iterator block = dfg.begin(); block != dfg.end(); ++block) { report(" Queueing up BB_" << block->id()); blocks.insert(block); } while(!blocks.empty()) { iterator block = *blocks.begin(); blocks.erase(blocks.begin()); eliminateRedundantInstructions(dfg, blocks, block); } report("Finished running constant propagation on kernel " << k.name); reportE(REPORT_PTX, k); }
ControlFlowGraph::ConstBlockPointerVector ControlFlowGraph::executable_sequence() const { typedef std::unordered_set<const_iterator> BlockSet; ConstBlockPointerVector sequence; BlockSet unscheduled; for(const_iterator i = begin(); i != end(); ++i) { unscheduled.insert(i); } report("Getting executable sequence."); sequence.push_back(get_entry_block()); unscheduled.erase(get_entry_block()); report(" added " << get_entry_block()->label()); while (!unscheduled.empty()) { if (sequence.back()->has_fallthrough_edge()) { const_edge_iterator fallthroughEdge = sequence.back()->get_fallthrough_edge(); sequence.push_back(fallthroughEdge->tail); unscheduled.erase(fallthroughEdge->tail); } else { // find a new block, favor branch targets over random blocks const_iterator next = *unscheduled.begin(); for(const_edge_pointer_iterator edge = sequence.back()->out_edges.begin(); edge != sequence.back()->out_edges.end(); ++edge) { if(unscheduled.count((*edge)->tail) != 0) { next = (*edge)->tail; } } // rewind through fallthrough edges to find the beginning of the // next chain of fall throughs report(" restarting at " << next->label()); bool rewinding = true; while (rewinding) { rewinding = false; for (const_edge_pointer_iterator edge = next->in_edges.begin(); edge != next->in_edges.end(); ++edge) { if ((*edge)->type == Edge::FallThrough) { assertM(unscheduled.count((*edge)->head) != 0, (*edge)->head->label() << " has multiple fallthrough branches."); next = (*edge)->head; report(" rewinding to " << next->label()); rewinding = true; break; } } } sequence.push_back(next); unscheduled.erase(next); } report(" added " << sequence.back()->label()); } return sequence; }
Shape *MakeLoop(BlockSet &Blocks, BlockSet& Entries, BlockSet &NextEntries) { // Find the inner blocks in this loop. Proceed backwards from the entries until // you reach a seen block, collecting as you go. BlockSet InnerBlocks; BlockSet Queue = Entries; while (Queue.size() > 0) { Block *Curr = *(Queue.begin()); Queue.erase(Queue.begin()); if (!contains(InnerBlocks, Curr)) { // This element is new, mark it as inner and remove from outer InnerBlocks.insert(Curr); Blocks.erase(Curr); // Add the elements prior to it for (BlockSet::iterator iter = Curr->BranchesIn.begin(); iter != Curr->BranchesIn.end(); iter++) { Queue.insert(*iter); } #if 0 // Add elements it leads to, if they are dead ends. There is no reason not to hoist dead ends // into loops, as it can avoid multiple entries after the loop for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { Block *Target = iter->first; if (Target->BranchesIn.size() <= 1 && Target->BranchesOut.size() == 0) { Queue.insert(Target); } } #endif } } assert(InnerBlocks.size() > 0); for (BlockSet::iterator iter = InnerBlocks.begin(); iter != InnerBlocks.end(); iter++) { Block *Curr = *iter; for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { Block *Possible = iter->first; if (!contains(InnerBlocks, Possible)) { NextEntries.insert(Possible); } } } #if 0 // We can avoid multiple next entries by hoisting them into the loop. if (NextEntries.size() > 1) { BlockBlockSetMap IndependentGroups; FindIndependentGroups(NextEntries, IndependentGroups, &InnerBlocks); while (IndependentGroups.size() > 0 && NextEntries.size() > 1) { Block *Min = NULL; int MinSize = 0; for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end(); iter++) { Block *Entry = iter->first; BlockSet &Blocks = iter->second; if (!Min || Blocks.size() < MinSize) { // TODO: code size, not # of blocks Min = Entry; MinSize = Blocks.size(); } } // check how many new entries this would cause BlockSet &Hoisted = IndependentGroups[Min]; bool abort = false; for (BlockSet::iterator iter = Hoisted.begin(); iter != Hoisted.end() && !abort; iter++) { Block *Curr = *iter; for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { Block *Target = iter->first; if (Hoisted.find(Target) == Hoisted.end() && NextEntries.find(Target) == NextEntries.end()) { // abort this hoisting abort = true; break; } } } if (abort) { IndependentGroups.erase(Min); continue; } // hoist this entry PrintDebug("hoisting %d into loop\n", Min->Id); NextEntries.erase(Min); for (BlockSet::iterator iter = Hoisted.begin(); iter != Hoisted.end(); iter++) { Block *Curr = *iter; InnerBlocks.insert(Curr); Blocks.erase(Curr); } IndependentGroups.erase(Min); } } #endif PrintDebug("creating loop block:\n"); DebugDump(InnerBlocks, " inner blocks:"); DebugDump(Entries, " inner entries:"); DebugDump(Blocks, " outer blocks:"); DebugDump(NextEntries, " outer entries:"); LoopShape *Loop = new LoopShape(); Notice(Loop); // Solipsize the loop, replacing with break/continue and marking branches as Processed (will not affect later calculations) // A. Branches to the loop entries become a continue to this shape for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { Solipsize(*iter, Branch::Continue, Loop, InnerBlocks); } // B. Branches to outside the loop (a next entry) become breaks on this shape for (BlockSet::iterator iter = NextEntries.begin(); iter != NextEntries.end(); iter++) { Solipsize(*iter, Branch::Break, Loop, InnerBlocks); } // Finish up Shape *Inner = Process(InnerBlocks, Entries, NULL); Loop->Inner = Inner; return Loop; }
void ReversePostOrderTraversal::analyze(Function& function) { typedef util::LargeSet<BasicBlock*> BlockSet; typedef std::stack<BasicBlock*> BlockStack; order.clear(); BlockSet visited; BlockStack stack; auto cfgAnalysis = getAnalysis("ControlFlowGraph"); auto cfg = static_cast<ControlFlowGraph*>(cfgAnalysis); report("Creating reverse post order traversal over function '" + function.name() + "'"); // reverse post order is reversed topological order stack.push(&*function.entry_block()); while(order.size() != function.size()) { if(stack.empty()) { for(auto block : order) { auto successors = cfg->getSuccessors(*block); for(auto successor : successors) { if(visited.insert(successor).second) { stack.push(successor); break; } } if(!stack.empty()) break; } } assertM(!stack.empty(), (function.size() - order.size()) << " blocks are not connected."); while(!stack.empty()) { BasicBlock* top = stack.top(); stack.pop(); auto successors = cfg->getSuccessors(*top); for(auto successor : successors) { assert(successor != nullptr); auto predecessors = cfg->getPredecessors(*successor); bool allPredecessorsVisited = true; for(auto predecessor : predecessors) { if(visited.count(predecessor) == 0) { allPredecessorsVisited = false; break; } } if(!allPredecessorsVisited) continue; if(visited.insert(successor).second) { stack.push(successor); } } order.push_back(top); report(" " << top->name()); } } // reverse the order std::reverse(order.begin(), order.end()); }
static void updateUses(iterator block, ir::Instruction::RegisterType registerId, const ir::PTXOperand& value, BlockSet& visited) { typedef analysis::DataflowGraph::RegisterPointerVector RegisterPointerVector; if(!visited.insert(block).second) return; // phi uses bool replacedPhi = false; bool anyPhis = false; ir::Instruction::RegisterType newRegisterId = 0; for(auto phi = block->phis().begin(); phi != block->phis().end(); ++phi) { if(phi->s.size() != 1) { for(auto source = phi->s.begin(); source != phi->s.end(); ++source) { if(source->id == registerId) { anyPhis = true; report(" could not remove " << phi->toString()); break; } } continue; } for(auto source = phi->s.begin(); source != phi->s.end(); ++source) { if(source->id == registerId) { newRegisterId = phi->d.id; block->phis().erase(phi); auto livein = block->aliveIn().find(registerId); assert(livein != block->aliveIn().end()); block->aliveIn().erase(livein); report(" removed " << phi->toString()); replacedPhi = true; break; } } if(replacedPhi) { break; } } if(replacedPhi) { BlockSet visited; updateUses(block, newRegisterId, value, visited); } // local uses for(auto instruction = block->instructions().begin(); instruction != block->instructions().end(); ++instruction) { auto ptx = static_cast<ir::PTXInstruction*>(instruction->i); RegisterPointerVector newSources; for(auto source = instruction->s.begin(); source != instruction->s.end(); ++source) { if(*source->pointer == registerId) { report(" updated use by '" << ptx->toString() << "', of r" << registerId); replaceOperand(*ptx, registerId, value); } else { newSources.push_back(*source); } } instruction->s = std::move(newSources); } if(!anyPhis) { auto livein = block->aliveIn().find(registerId); if(livein != block->aliveIn().end()) { block->aliveIn().erase(livein); report(" removed from live-in set of block " << block->id()); } } auto liveout = block->aliveOut().find(registerId); if(liveout == block->aliveOut().end()) return; // uses by successors bool anyUsesBySuccessors = false; for(auto successor = block->successors().begin(); successor != block->successors().end(); ++successor) { auto livein = (*successor)->aliveIn().find(registerId); if(livein == (*successor)->aliveIn().end()) continue; updateUses(*successor, registerId, value, visited); livein = (*successor)->aliveIn().find(registerId); if(livein == (*successor)->aliveIn().end()) continue; anyUsesBySuccessors = true; } if(!anyUsesBySuccessors) { report(" removed from live-out set of BB_" << block->id()); block->aliveOut().erase(liveout); } }
void Relooper::Calculate(Block *Entry) { // Scan and optimize the input struct PreOptimizer : public RelooperRecursor { PreOptimizer(Relooper *Parent) : RelooperRecursor(Parent) {} BlockSet Live; void FindLive(Block *Root) { BlockList ToInvestigate; ToInvestigate.push_back(Root); while (ToInvestigate.size() > 0) { Block *Curr = ToInvestigate.front(); ToInvestigate.pop_front(); if (Live.find(Curr) != Live.end()) continue; Live.insert(Curr); for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { ToInvestigate.push_back(iter->first); } } } // If a block has multiple entries but no exits, and it is small enough, it is useful to split it. // A common example is a C++ function where everything ends up at a final exit block and does some // RAII cleanup. Without splitting, we will be forced to introduce labelled loops to allow // reaching the final block void SplitDeadEnds() { int TotalCodeSize = 0; for (BlockSet::iterator iter = Live.begin(); iter != Live.end(); iter++) { Block *Curr = *iter; TotalCodeSize += strlen(Curr->Code); } for (BlockSet::iterator iter = Live.begin(); iter != Live.end(); iter++) { Block *Original = *iter; if (Original->BranchesIn.size() <= 1 || Original->BranchesOut.size() > 0) continue; if (strlen(Original->Code)*(Original->BranchesIn.size()-1) > TotalCodeSize/5) continue; // if splitting increases raw code size by a significant amount, abort // Split the node (for simplicity, we replace all the blocks, even though we could have reused the original) for (BlockBranchMap::iterator iter = Original->BranchesIn.begin(); iter != Original->BranchesIn.end(); iter++) { Block *Prior = iter->first; Block *Split = new Block(Original->Code); Split->BranchesIn[Prior] = new Branch(NULL); Prior->BranchesOut[Split] = new Branch(Prior->BranchesOut[Original]->Condition, Prior->BranchesOut[Original]->Code); Prior->BranchesOut.erase(Original); Parent->AddBlock(Split); Live.insert(Split); } } } }; PreOptimizer Pre(this); Pre.FindLive(Entry); // Add incoming branches from live blocks, ignoring dead code for (int i = 0; i < Blocks.size(); i++) { Block *Curr = Blocks[i]; if (Pre.Live.find(Curr) == Pre.Live.end()) continue; for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { iter->first->BranchesIn[Curr] = new Branch(NULL); } } Pre.SplitDeadEnds(); // Recursively process the graph struct Analyzer : public RelooperRecursor { Analyzer(Relooper *Parent) : RelooperRecursor(Parent) {} // Add a shape to the list of shapes in this Relooper calculation void Notice(Shape *New) { Parent->Shapes.push_back(New); } // Create a list of entries from a block. If LimitTo is provided, only results in that set // will appear void GetBlocksOut(Block *Source, BlockSet& Entries, BlockSet *LimitTo=NULL) { for (BlockBranchMap::iterator iter = Source->BranchesOut.begin(); iter != Source->BranchesOut.end(); iter++) { if (!LimitTo || LimitTo->find(iter->first) != LimitTo->end()) { Entries.insert(iter->first); } } } // Converts/processes all branchings to a specific target void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor, BlockSet &From) { PrintDebug("Solipsizing branches into %d\n", Target->Id); DebugDump(From, " relevant to solipsize: "); for (BlockBranchMap::iterator iter = Target->BranchesIn.begin(); iter != Target->BranchesIn.end();) { Block *Prior = iter->first; if (From.find(Prior) == From.end()) { iter++; continue; } Branch *TargetIn = iter->second; Branch *PriorOut = Prior->BranchesOut[Target]; PriorOut->Ancestor = Ancestor; // Do we need this info PriorOut->Type = Type; // on TargetIn too? if (MultipleShape *Multiple = Shape::IsMultiple(Ancestor)) { Multiple->NeedLoop++; // We are breaking out of this Multiple, so need a loop } iter++; // carefully increment iter before erasing Target->BranchesIn.erase(Prior); Target->ProcessedBranchesIn[Prior] = TargetIn; Prior->BranchesOut.erase(Target); Prior->ProcessedBranchesOut[Target] = PriorOut; PrintDebug(" eliminated branch from %d\n", Prior->Id); } } Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) { PrintDebug("creating simple block with block #%d\n", Inner->Id); SimpleShape *Simple = new SimpleShape; Notice(Simple); Simple->Inner = Inner; Inner->Parent = Simple; if (Blocks.size() > 1) { Blocks.erase(Inner); GetBlocksOut(Inner, NextEntries, &Blocks); BlockSet JustInner; JustInner.insert(Inner); for (BlockSet::iterator iter = NextEntries.begin(); iter != NextEntries.end(); iter++) { Solipsize(*iter, Branch::Direct, Simple, JustInner); } } return Simple; } Shape *MakeLoop(BlockSet &Blocks, BlockSet& Entries, BlockSet &NextEntries) { // Find the inner blocks in this loop. Proceed backwards from the entries until // you reach a seen block, collecting as you go. BlockSet InnerBlocks; BlockSet Queue = Entries; while (Queue.size() > 0) { Block *Curr = *(Queue.begin()); Queue.erase(Queue.begin()); if (InnerBlocks.find(Curr) == InnerBlocks.end()) { // This element is new, mark it as inner and remove from outer InnerBlocks.insert(Curr); Blocks.erase(Curr); // Add the elements prior to it for (BlockBranchMap::iterator iter = Curr->BranchesIn.begin(); iter != Curr->BranchesIn.end(); iter++) { Queue.insert(iter->first); } } } assert(InnerBlocks.size() > 0); for (BlockSet::iterator iter = InnerBlocks.begin(); iter != InnerBlocks.end(); iter++) { Block *Curr = *iter; for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { Block *Possible = iter->first; if (InnerBlocks.find(Possible) == InnerBlocks.end() && NextEntries.find(Possible) == NextEntries.find(Possible)) { NextEntries.insert(Possible); } } } PrintDebug("creating loop block:\n"); DebugDump(InnerBlocks, " inner blocks:"); DebugDump(Entries, " inner entries:"); DebugDump(Blocks, " outer blocks:"); DebugDump(NextEntries, " outer entries:"); // TODO: Optionally hoist additional blocks into the loop LoopShape *Loop = new LoopShape(); Notice(Loop); // Solipsize the loop, replacing with break/continue and marking branches as Processed (will not affect later calculations) // A. Branches to the loop entries become a continue to this shape for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { Solipsize(*iter, Branch::Continue, Loop, InnerBlocks); } // B. Branches to outside the loop (a next entry) become breaks on this shape for (BlockSet::iterator iter = NextEntries.begin(); iter != NextEntries.end(); iter++) { Solipsize(*iter, Branch::Break, Loop, InnerBlocks); } // Finish up Shape *Inner = Process(InnerBlocks, Entries, NULL); Loop->Inner = Inner; return Loop; } // For each entry, find the independent group reachable by it. The independent group is // the entry itself, plus all the blocks it can reach that cannot be directly reached by another entry. Note that we // ignore directly reaching the entry itself by another entry. void FindIndependentGroups(BlockSet &Blocks, BlockSet &Entries, BlockBlockSetMap& IndependentGroups) { typedef std::map<Block*, Block*> BlockBlockMap; struct HelperClass { BlockBlockSetMap& IndependentGroups; BlockBlockMap Ownership; // For each block, which entry it belongs to. We have reached it from there. HelperClass(BlockBlockSetMap& IndependentGroupsInit) : IndependentGroups(IndependentGroupsInit) {} void InvalidateWithChildren(Block *New) { // TODO: rename New BlockList ToInvalidate; // Being in the list means you need to be invalidated ToInvalidate.push_back(New); while (ToInvalidate.size() > 0) { Block *Invalidatee = ToInvalidate.front(); ToInvalidate.pop_front(); Block *Owner = Ownership[Invalidatee]; if (IndependentGroups.find(Owner) != IndependentGroups.end()) { // Owner may have been invalidated, do not add to IndependentGroups! IndependentGroups[Owner].erase(Invalidatee); } if (Ownership[Invalidatee]) { // may have been seen before and invalidated already Ownership[Invalidatee] = NULL; for (BlockBranchMap::iterator iter = Invalidatee->BranchesOut.begin(); iter != Invalidatee->BranchesOut.end(); iter++) { Block *Target = iter->first; BlockBlockMap::iterator Known = Ownership.find(Target); if (Known != Ownership.end()) { Block *TargetOwner = Known->second; if (TargetOwner) { ToInvalidate.push_back(Target); } } } } } } }; HelperClass Helper(IndependentGroups); // We flow out from each of the entries, simultaneously. // When we reach a new block, we add it as belonging to the one we got to it from. // If we reach a new block that is already marked as belonging to someone, it is reachable by // two entries and is not valid for any of them. Remove it and all it can reach that have been // visited. BlockList Queue; // Being in the queue means we just added this item, and we need to add its children for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { Block *Entry = *iter; Helper.Ownership[Entry] = Entry; IndependentGroups[Entry].insert(Entry); Queue.push_back(Entry); } while (Queue.size() > 0) { Block *Curr = Queue.front(); Queue.pop_front(); Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership map if we are in the queue if (!Owner) continue; // we have been invalidated meanwhile after being reached from two entries // Add all children for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { Block *New = iter->first; BlockBlockMap::iterator Known = Helper.Ownership.find(New); if (Known == Helper.Ownership.end()) { // New node. Add it, and put it in the queue Helper.Ownership[New] = Owner; IndependentGroups[Owner].insert(New); Queue.push_back(New); continue; } Block *NewOwner = Known->second; if (!NewOwner) continue; // We reached an invalidated node if (NewOwner != Owner) { // Invalidate this and all reachable that we have seen - we reached this from two locations Helper.InvalidateWithChildren(New); } // otherwise, we have the same owner, so do nothing } } // Having processed all the interesting blocks, we remain with just one potential issue: // If a->b, and a was invalidated, but then b was later reached by someone else, we must // invalidate b. To check for this, we go over all elements in the independent groups, // if an element has a parent which does *not* have the same owner, we must remove it // and all its children. for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { BlockSet &CurrGroup = IndependentGroups[*iter]; BlockList ToInvalidate; for (BlockSet::iterator iter = CurrGroup.begin(); iter != CurrGroup.end(); iter++) { Block *Child = *iter; for (BlockBranchMap::iterator iter = Child->BranchesIn.begin(); iter != Child->BranchesIn.end(); iter++) { Block *Parent = iter->first; if (Helper.Ownership[Parent] != Helper.Ownership[Child]) { ToInvalidate.push_back(Child); } } } while (ToInvalidate.size() > 0) { Block *Invalidatee = ToInvalidate.front(); ToInvalidate.pop_front(); Helper.InvalidateWithChildren(Invalidatee); } } // Remove empty groups for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { if (IndependentGroups[*iter].size() == 0) { IndependentGroups.erase(*iter); } } #if DEBUG PrintDebug("Investigated independent groups:\n"); for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end(); iter++) { DebugDump(iter->second, " group: "); } #endif } Shape *MakeMultiple(BlockSet &Blocks, BlockSet& Entries, BlockBlockSetMap& IndependentGroups, Shape *Prev, BlockSet &NextEntries) { PrintDebug("creating multiple block with %d inner groups\n", IndependentGroups.size()); bool Fused = !!(Shape::IsSimple(Prev)); MultipleShape *Multiple = new MultipleShape(); Notice(Multiple); BlockSet CurrEntries; for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end(); iter++) { Block *CurrEntry = iter->first; BlockSet &CurrBlocks = iter->second; PrintDebug(" multiple group with entry %d:\n", CurrEntry->Id); DebugDump(CurrBlocks, " "); // Create inner block CurrEntries.clear(); CurrEntries.insert(CurrEntry); for (BlockSet::iterator iter = CurrBlocks.begin(); iter != CurrBlocks.end(); iter++) { Block *CurrInner = *iter; // Remove the block from the remaining blocks Blocks.erase(CurrInner); // Find new next entries and fix branches to them for (BlockBranchMap::iterator iter = CurrInner->BranchesOut.begin(); iter != CurrInner->BranchesOut.end();) { Block *CurrTarget = iter->first; BlockBranchMap::iterator Next = iter; Next++; if (CurrBlocks.find(CurrTarget) == CurrBlocks.end()) { NextEntries.insert(CurrTarget); Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks); } iter = Next; // increment carefully because Solipsize can remove us } } Multiple->InnerMap[CurrEntry] = Process(CurrBlocks, CurrEntries, NULL); // If we are not fused, then our entries will actually be checked if (!Fused) { CurrEntry->IsCheckedMultipleEntry = true; } } DebugDump(Blocks, " remaining blocks after multiple:"); // Add entries not handled as next entries, they are deferred for (BlockSet::iterator iter = Entries.begin(); iter != Entries.end(); iter++) { Block *Entry = *iter; if (IndependentGroups.find(Entry) == IndependentGroups.end()) { NextEntries.insert(Entry); } } return Multiple; } // Main function. // Process a set of blocks with specified entries, returns a shape // The Make* functions receive a NextEntries. If they fill it with data, those are the entries for the // ->Next block on them, and the blocks are what remains in Blocks (which Make* modify). In this way // we avoid recursing on Next (imagine a long chain of Simples, if we recursed we could blow the stack). Shape *Process(BlockSet &Blocks, BlockSet& InitialEntries, Shape *Prev) { PrintDebug("Process() called\n"); BlockSet *Entries = &InitialEntries; BlockSet TempEntries[2]; int CurrTempIndex = 0; BlockSet *NextEntries; Shape *Ret = NULL; #define Make(call) \ Shape *Temp = call; \ if (Prev) Prev->Next = Temp; \ if (!Ret) Ret = Temp; \ if (!NextEntries->size()) { PrintDebug("Process() returning\n"); return Ret; } \ Prev = Temp; \ Entries = NextEntries; \ continue; while (1) { PrintDebug("Process() running\n"); DebugDump(Blocks, " blocks : "); DebugDump(*Entries, " entries: "); CurrTempIndex = 1-CurrTempIndex; NextEntries = &TempEntries[CurrTempIndex]; NextEntries->clear(); if (Entries->size() == 0) return Ret; if (Entries->size() == 1) { Block *Curr = *(Entries->begin()); if (Curr->BranchesIn.size() == 0) { // One entry, no looping ==> Simple Make(MakeSimple(Blocks, Curr, *NextEntries)); } // One entry, looping ==> Loop Make(MakeLoop(Blocks, *Entries, *NextEntries)); } // More than one entry, try to eliminate through a Multiple groups of // independent blocks from an entry/ies. It is important to remove through // multiples as opposed to looping since the former is more performant. BlockBlockSetMap IndependentGroups; FindIndependentGroups(Blocks, *Entries, IndependentGroups); PrintDebug("Independent groups: %d\n", IndependentGroups.size()); if (IndependentGroups.size() > 0) { // We can handle a group in a multiple if its entry cannot be reached by another group. // Note that it might be reachable by itself - a loop. But that is fine, we will create // a loop inside the multiple block (which is the performant order to do it). for (BlockBlockSetMap::iterator iter = IndependentGroups.begin(); iter != IndependentGroups.end();) { Block *Entry = iter->first; BlockSet &Group = iter->second; BlockBlockSetMap::iterator curr = iter++; // iterate carefully, we may delete for (BlockBranchMap::iterator iterBranch = Entry->BranchesIn.begin(); iterBranch != Entry->BranchesIn.end(); iterBranch++) { Block *Origin = iterBranch->first; if (Group.find(Origin) == Group.end()) { // Reached from outside the group, so we cannot handle this PrintDebug("Cannot handle group with entry %d because of incoming branch from %d\n", Entry->Id, Origin->Id); IndependentGroups.erase(curr); break; } } } // As an optimization, if we have 2 independent groups, and one is a small dead end, we can handle only that dead end. // The other then becomes a Next - without nesting in the code and recursion in the analysis. // TODO: if the larger is the only dead end, handle that too // TODO: handle >2 groups // TODO: handle not just dead ends, but also that do not branch to the NextEntries. However, must be careful // there since we create a Next, and that Next can prevent eliminating a break (since we no longer // naturally reach the same place), which may necessitate a one-time loop, which makes the unnesting // pointless. if (IndependentGroups.size() == 2) { // Find the smaller one BlockBlockSetMap::iterator iter = IndependentGroups.begin(); Block *SmallEntry = iter->first; int SmallSize = iter->second.size(); iter++; Block *LargeEntry = iter->first; int LargeSize = iter->second.size(); if (SmallSize != LargeSize) { // ignore the case where they are identical - keep things symmetrical there if (SmallSize > LargeSize) { Block *Temp = SmallEntry; SmallEntry = LargeEntry; LargeEntry = Temp; // Note: we did not flip the Sizes too, they are now invalid. TODO: use the smaller size as a limit? } // Check if dead end bool DeadEnd = true; BlockSet &SmallGroup = IndependentGroups[SmallEntry]; for (BlockSet::iterator iter = SmallGroup.begin(); iter != SmallGroup.end(); iter++) { Block *Curr = *iter; for (BlockBranchMap::iterator iter = Curr->BranchesOut.begin(); iter != Curr->BranchesOut.end(); iter++) { Block *Target = iter->first; if (SmallGroup.find(Target) == SmallGroup.end()) { DeadEnd = false; break; } } if (!DeadEnd) break; } if (DeadEnd) { PrintDebug("Removing nesting by not handling large group because small group is dead end\n"); IndependentGroups.erase(LargeEntry); } } } PrintDebug("Handleable independent groups: %d\n", IndependentGroups.size()); if (IndependentGroups.size() > 0) { // Some groups removable ==> Multiple Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev, *NextEntries)); } } // No independent groups, must be loopable ==> Loop Make(MakeLoop(Blocks, *Entries, *NextEntries)); } } }; // Main BlockSet AllBlocks; for (int i = 0; i < Blocks.size(); i++) { AllBlocks.insert(Blocks[i]); #if DEBUG PrintDebug("Adding block %d (%s)\n", Blocks[i]->Id, Blocks[i]->Code); for (BlockBranchMap::iterator iter = Blocks[i]->BranchesOut.begin(); iter != Blocks[i]->BranchesOut.end(); iter++) { PrintDebug(" with branch out to %d\n", iter->first->Id); } #endif } BlockSet Entries; Entries.insert(Entry); Root = Analyzer(this).Process(AllBlocks, Entries, NULL); // Post optimizations struct PostOptimizer { Relooper *Parent; void *Closure; PostOptimizer(Relooper *ParentInit) : Parent(ParentInit), Closure(NULL) {} #define RECURSE_MULTIPLE_MANUAL(func, manual) \ for (BlockShapeMap::iterator iter = manual->InnerMap.begin(); iter != manual->InnerMap.end(); iter++) { \ func(iter->second); \ } #define RECURSE_MULTIPLE(func) RECURSE_MULTIPLE_MANUAL(func, Multiple); #define RECURSE_LOOP(func) \ func(Loop->Inner); #define SHAPE_SWITCH(var, simple, multiple, loop) \ if (SimpleShape *Simple = Shape::IsSimple(var)) { \ simple; \ } else if (MultipleShape *Multiple = Shape::IsMultiple(var)) { \ multiple; \ } else if (LoopShape *Loop = Shape::IsLoop(var)) { \ loop; \ } #define SHAPE_SWITCH_AUTO(var, simple, multiple, loop, func) \ if (SimpleShape *Simple = Shape::IsSimple(var)) { \ simple; \ func(Simple->Next); \ } else if (MultipleShape *Multiple = Shape::IsMultiple(var)) { \ multiple; \ RECURSE_MULTIPLE(func) \ func(Multiple->Next); \ } else if (LoopShape *Loop = Shape::IsLoop(var)) { \ loop; \ RECURSE_LOOP(func); \ func(Loop->Next); \ } // Remove unneeded breaks and continues. // A flow operation is trivially unneeded if the shape we naturally get to by normal code // execution is the same as the flow forces us to. void RemoveUnneededFlows(Shape *Root, Shape *Natural=NULL) { Shape *Next = Root; while (Next) { Root = Next; Next = NULL; SHAPE_SWITCH(Root, { // If there is a next block, we already know at Simple creation time to make direct branches, // and we can do nothing more. If there is no next however, then Natural is where we will // go to by doing nothing, so we can potentially optimize some branches to direct. if (Simple->Next) { Next = Simple->Next; } else { for (BlockBranchMap::iterator iter = Simple->Inner->ProcessedBranchesOut.begin(); iter != Simple->Inner->ProcessedBranchesOut.end(); iter++) { Block *Target = iter->first; Branch *Details = iter->second; if (Details->Type != Branch::Direct && Target->Parent == Natural) { Details->Type = Branch::Direct; if (MultipleShape *Multiple = Shape::IsMultiple(Details->Ancestor)) { Multiple->NeedLoop--; } } } } }, { for (BlockShapeMap::iterator iter = Multiple->InnerMap.begin(); iter != Multiple->InnerMap.end(); iter++) { RemoveUnneededFlows(iter->second, Multiple->Next); } Next = Multiple->Next; }, {
void StackAllocationPromoter::promoteAllocationToPhi() { DEBUG(llvm::dbgs() << "*** Placing Phis for : " << *ASI); // A list of blocks that will require new Phi values. BlockSet PhiBlocks; // The "piggy-bank" data-structure that we use for processing the dom-tree // bottom-up. NodePriorityQueue PQ; // Collect all of the stores into the AllocStack. We know that at this point // we have at most one store per block. for (auto UI = ASI->use_begin(), E = ASI->use_end(); UI != E; ++UI) { SILInstruction *II = UI->getUser(); // We need to place Phis for this block. if (isa<StoreInst>(II)) { // If the block is in the dom tree (dominated by the entry block). if (DomTreeNode *Node = DT->getNode(II->getParent())) PQ.push(std::make_pair(Node, DomTreeLevels[Node])); } } DEBUG(llvm::dbgs() << "*** Found: " << PQ.size() << " Defs\n"); // A list of nodes for which we already calculated the dominator frontier. llvm::SmallPtrSet<DomTreeNode *, 32> Visited; SmallVector<DomTreeNode *, 32> Worklist; // Scan all of the definitions in the function bottom-up using the priority // queue. while (!PQ.empty()) { DomTreeNodePair RootPair = PQ.top(); PQ.pop(); DomTreeNode *Root = RootPair.first; unsigned RootLevel = RootPair.second; // Walk all dom tree children of Root, inspecting their successors. Only // J-edges, whose target level is at most Root's level are added to the // dominance frontier. Worklist.clear(); Worklist.push_back(Root); while (!Worklist.empty()) { DomTreeNode *Node = Worklist.pop_back_val(); SILBasicBlock *BB = Node->getBlock(); // For all successors of the node: for (auto &Succ : BB->getSuccessors()) { DomTreeNode *SuccNode = DT->getNode(Succ); // Skip D-edges (edges that are dom-tree edges). if (SuccNode->getIDom() == Node) continue; // Ignore J-edges that point to nodes that are not smaller or equal // to the root level. unsigned SuccLevel = DomTreeLevels[SuccNode]; if (SuccLevel > RootLevel) continue; // Ignore visited nodes. if (!Visited.insert(SuccNode).second) continue; // If the new PHInode is not dominated by the allocation then it's dead. if (!DT->dominates(ASI->getParent(), SuccNode->getBlock())) continue; // If the new PHInode is properly dominated by the deallocation then it // is obviously a dead PHInode, so we don't need to insert it. if (DSI && DT->properlyDominates(DSI->getParent(), SuccNode->getBlock())) continue; // The successor node is a new PHINode. If this is a new PHI node // then it may require additional definitions, so add it to the PQ. if (PhiBlocks.insert(Succ).second) PQ.push(std::make_pair(SuccNode, SuccLevel)); } // Add the children in the dom-tree to the worklist. for (auto CI = Node->begin(), CE = Node->end(); CI != CE; ++CI) if (!Visited.count(*CI)) Worklist.push_back(*CI); } } DEBUG(llvm::dbgs() << "*** Found: " << PhiBlocks.size() << " new PHIs\n"); NumPhiPlaced += PhiBlocks.size(); // At this point we calculated the locations of all of the new Phi values. // Next, add the Phi values and promote all of the loads and stores into the // new locations. // Replace the dummy values with new block arguments. addBlockArguments(PhiBlocks); // Hook up the Phi nodes, loads, and debug_value_addr with incoming values. fixBranchesAndUses(PhiBlocks); DEBUG(llvm::dbgs() << "*** Finished placing Phis ***\n"); }
static void eliminateDeadInstructions(analysis::DataflowGraph& dfg, BlockSet& blocks, iterator block) { typedef analysis::DataflowGraph::Block Block; typedef analysis::DataflowGraph::RegisterSet RegisterSet; typedef std::vector<unsigned int> KillList; typedef std::vector<PhiInstructionVector::iterator> PhiKillList; typedef std::vector<RegisterSet::iterator> AliveKillList; report(" Eliminating dead instructions from BB_" << block->id()); report(" Removing dead alive out values"); AliveKillList aliveOutKillList; for(RegisterSet::iterator aliveOut = block->aliveOut().begin(); aliveOut != block->aliveOut().end(); ++aliveOut) { if(canRemoveAliveOut(dfg, block, *aliveOut)) { report(" removed " << aliveOut->id); aliveOutKillList.push_back(aliveOut); } } for(AliveKillList::iterator killed = aliveOutKillList.begin(); killed != aliveOutKillList.end(); ++killed) { block->aliveOut().erase(*killed); } KillList killList; report(" Removing dead instructions"); unsigned int index = 0; for(InstructionVector::iterator instruction = block->instructions().begin(); instruction != block->instructions().end(); ++instruction) { if(canRemoveInstruction(block, instruction)) { report(" removed '" << instruction->i->toString() << "'"); killList.push_back(index); // schedule the block for more work report(" scheduled this block again"); blocks.insert(block); } else { ++index; } } for(KillList::iterator killed = killList.begin(); killed != killList.end(); ++killed) { dfg.erase(block, *killed); } PhiKillList phiKillList; report(" Removing dead phi instructions"); for(PhiInstructionVector::iterator phi = block->phis().begin(); phi != block->phis().end(); ++phi) { if(canRemovePhi(block, *phi)) { report(" removed " << phi->d.id); phiKillList.push_back(phi); } } report(" Removing dead alive in values"); AliveKillList aliveInKillList; for(RegisterSet::iterator aliveIn = block->aliveIn().begin(); aliveIn != block->aliveIn().end(); ++aliveIn) { if(canRemoveAliveIn(block, *aliveIn)) { report(" removed " << aliveIn->id); aliveInKillList.push_back(aliveIn); // schedule the predecessors for more work for(BlockPointerSet::iterator predecessor = block->predecessors().begin(); predecessor != block->predecessors().end(); ++predecessor) { report(" scheduled predecessor BB_" << (*predecessor)->id()); blocks.insert(*predecessor); } } } for(AliveKillList::iterator killed = aliveInKillList.begin(); killed != aliveInKillList.end(); ++killed) { block->aliveIn().erase(*killed); } }