void SSAPass::_run(IRManager& irm) { OptPass::computeDominators(irm); DominatorTree* dominatorTree = irm.getDominatorTree(); ControlFlowGraph& flowGraph = irm.getFlowGraph(); DomFrontier frontier(irm.getNestedMemoryManager(),*dominatorTree,&flowGraph); SSABuilder ssaBuilder(irm.getOpndManager(),irm.getInstFactory(),frontier,&flowGraph, irm.getOptimizerFlags()); ssaBuilder.convertSSA(irm.getMethodDesc()); irm.setInSsa(true); irm.setSsaUpdated(); }
static void doUnroll(MemoryManager& mm, IRManager& irm, const LoopUnrollInfo* info, const UnrollFlags& flags) { //unroll algorithm does the following //before: // loopOrig { // bodyA // check(idxOpnd,limitOpnd) // bodyB // } //after: // unrolledIncOpnd = unrollCount * idx->increment // unrolledLimitOpnd = limitOpnd-unrolledIncOpnd; // bodyA // loopUnrolled { // check(idxOpnd,unrolledLimitOpnd) // bodyB // bodyA // bodyB // ... // bodyA // } // loopEpilogue { // check(idxOpnd,limitOpnd) // bodyB // bodyA // } // //where: // bodyA - all nodes of the same loop accessible from checkNode via incoming edges // bodyB - all nodes except bodyA and checkNode ControlFlowGraph& cfg = irm.getFlowGraph(); LoopTree* lt = cfg.getLoopTree(); InstFactory& instFactory = irm.getInstFactory(); OpndManager& opndManager = irm.getOpndManager(); Type* opType = info->getLimitOpnd()->getType(); // printf("UNROLL\n"); //STEP 0: cache all data needed assert(info->unrollCount >= 1); Node* origHeader = info->header; assert(origHeader->getInDegree() == 2); //loop is normalized OptPass::computeLoops(irm);//recompute loop info if needed LoopNode* loopNode = lt->getLoopNode(origHeader, false); Edge* entryEdge = origHeader->getInEdges().front(); if (lt->isBackEdge(entryEdge)) { entryEdge = origHeader->getInEdges().back(); } Node* origCheckNode = info->branchInst->getNode(); Edge* origLoopExitEdge = info->branchTargetIsExit ? origCheckNode->getTrueEdge() : origCheckNode->getFalseEdge(); U_32 maxNodeId = cfg.getMaxNodeId()+1; //+1 for a split check node StlBitVector nodesInLoop(mm, maxNodeId); { const Nodes& loopNodes = loopNode->getNodesInLoop(); for (Nodes::const_iterator it = loopNodes.begin(), end = loopNodes.end(); it!=end; ++it) { Node* node = *it; nodesInLoop.setBit(node->getId()); } } //STEP 1: calculate bodyA nodes BitSet aFlags(mm, maxNodeId); calculateReachableNodesInLoop(loopNode, origHeader, origCheckNode, aFlags); StlBitVector bodyANodes(mm, maxNodeId); for (U_32 i=0;i<maxNodeId;i++) bodyANodes.setBit(i, aFlags.getBit(i)); //STEP 2: make checkNode a separate node, prepare loop region bodyANodes.setBit(origCheckNode->getId(), true); Node* checkNode = cfg.splitNodeAtInstruction(info->branchInst->prev(), true, false, instFactory.makeLabel()); nodesInLoop.setBit(checkNode->getId(), true); Node* preCheckNode = origCheckNode; bodyANodes.setBit(preCheckNode->getId(), true); //STEP 3: rotate original loop // before: {bodyA1, check , bodyB} // after: bodyA2 {check, bodyB, bodyA1} Edge* bodyA2ToCheckEdge = NULL; Opnd* limitOpndInBodyA2 = NULL; { //WARN: info->limitOpnd and info->indexOpnd can be replaced after code duplication if promoted to vars Opnd* limitOpndBefore = info->getLimitOpnd(); assert(preCheckNode->getOutDegree()==1 && preCheckNode->getUnconditionalEdgeTarget() == checkNode); DefUseBuilder defUses(mm); defUses.initialize(cfg); OpndRenameTable opndRenameTable(mm, maxNodeId); //todo: maxNodeId is overkill estimate here NodeRenameTable nodeRenameTable(mm, maxNodeId); Node* bodyA2 = FlowGraph::duplicateRegion(irm, origHeader, bodyANodes, defUses, nodeRenameTable, opndRenameTable); cfg.replaceEdgeTarget(entryEdge, bodyA2, true); // while duplicating a region new nodes could be created and 'nodesInRegion' bitvector param is updated. // BodyA is part of the loop -> if new nodes were created in the loop we must track them. nodesInLoop.resize(bodyANodes.size()); for (U_32 i=0;i<bodyANodes.size();i++) nodesInLoop.setBit(i, bodyANodes.getBit(i) || nodesInLoop.getBit(i)); Node* bodyA2PreCheckNode = nodeRenameTable.getMapping(preCheckNode); assert(bodyA2PreCheckNode->getOutDegree()==1 && bodyA2PreCheckNode->getUnconditionalEdgeTarget() == checkNode); bodyA2ToCheckEdge = bodyA2PreCheckNode->getUnconditionalEdge(); limitOpndInBodyA2 = limitOpndBefore; if (nodeRenameTable.getMapping(limitOpndBefore->getInst()->getNode())!=NULL) { limitOpndInBodyA2 = opndRenameTable.getMapping(limitOpndBefore); } assert(limitOpndInBodyA2!=NULL); } //STEP 4: prepare epilogue loop: {check, bodyB, bodyA} Node* epilogueLoopHead = NULL; { DefUseBuilder defUses(mm); defUses.initialize(cfg); OpndRenameTable opndRenameTable(mm, maxNodeId); //todo: maxNodeId is overkill estimate here NodeRenameTable nodeRenameTable(mm, maxNodeId); epilogueLoopHead = FlowGraph::duplicateRegion(irm, checkNode, nodesInLoop, defUses, nodeRenameTable, opndRenameTable); cfg.replaceEdgeTarget(origLoopExitEdge, epilogueLoopHead, true); } //STEP 5: prepare unrolledLimitOpnd and replace it in original loop's check { Node* unrolledPreheader = cfg.spliceBlockOnEdge(bodyA2ToCheckEdge, instFactory.makeLabel()); Opnd* unrolledIncOpnd = opndManager.createSsaTmpOpnd(opType); unrolledPreheader->appendInst(instFactory.makeLdConst(unrolledIncOpnd, info->increment * info->unrollCount)); Opnd* unrolledLimitOpnd = opndManager.createSsaTmpOpnd(opType); Modifier mod = Modifier(SignedOp)|Modifier(Strict_No)|Modifier(Overflow_None)|Modifier(Exception_Never); unrolledPreheader->appendInst(instFactory.makeSub(mod, unrolledLimitOpnd, limitOpndInBodyA2, unrolledIncOpnd)); info->branchInst->setSrc(info->branchLimitOpndPos, unrolledLimitOpnd); } DefUseBuilder defUses(mm); defUses.initialize(cfg); //STEP 6: unroll original loop and remove all checks in duplicated bodies { Edge* backedge = preCheckNode->getUnconditionalEdge(); for (int i=1;i<info->unrollCount;i++) { OpndRenameTable opndRenameTable(mm, maxNodeId); NodeRenameTable nodeRenameTable(mm, maxNodeId); Node* unrolledRegionHeader = FlowGraph::duplicateRegion(irm, checkNode, nodesInLoop, defUses, nodeRenameTable, opndRenameTable); cfg.replaceEdgeTarget(backedge, unrolledRegionHeader, true); Node* newTail = nodeRenameTable.getMapping(preCheckNode); assert(newTail->getOutDegree()==1 ); backedge = newTail->getUnconditionalEdge(); cfg.replaceEdgeTarget(backedge, checkNode, true); //remove check from duplicated code Node* duplicateCheckNode = nodeRenameTable.getMapping(checkNode); assert(duplicateCheckNode->getOutDegree()==2); Edge* exitEdge = info->branchTargetIsExit ? duplicateCheckNode->getTrueEdge() : duplicateCheckNode->getFalseEdge(); duplicateCheckNode->getLastInst()->unlink(); cfg.removeEdge(exitEdge); } } //STEP 7: make old loop colder if (cfg.hasEdgeProfile()) { Edge* epilogueExit = info->branchTargetIsExit ? epilogueLoopHead->getTrueEdge() : epilogueLoopHead->getFalseEdge(); epilogueExit->setEdgeProb(epilogueExit->getEdgeProb() * 5); } }
void LoopUnrollPass::_run(IRManager& irm) { const UnrollFlags& flags = ((LoopUnrollAction*)getAction())->getFlags(); OptPass::computeDominatorsAndLoops(irm); ControlFlowGraph& cfg = irm.getFlowGraph(); LoopTree* lt = cfg.getLoopTree(); if (!lt->hasLoops()) { return; } MemoryManager mm("loopUnrollMM"); UnrollInfos loopsToUnroll(mm); findLoopsToUnroll(mm, irm, loopsToUnroll, flags); if (loopsToUnroll.empty()) { if (Log::isEnabled()) Log::out() << "No candidates found to unroll"<<std::endl; return; } if (Log::isEnabled()) { Log::out()<<"Loops to unroll before filtering:"<<std::endl; for (UnrollInfos::const_iterator it = loopsToUnroll.begin(), end = loopsToUnroll.end();it!=end; ++it) { const LoopUnrollInfo* info = *it; info->print(Log::out()); Log::out()<<std::endl; } } bool hasProfile = cfg.hasEdgeProfile(); //filter out that can't be unrolled, calculate BodyA and BodyB BitSet bodyANodes(mm, cfg.getMaxNodeId()), bodyBNodes(mm, cfg.getMaxNodeId()); for (UnrollInfos::iterator it = loopsToUnroll.begin(), end = loopsToUnroll.end();it!=end; ++it) { LoopUnrollInfo* info = *it; if (info == NULL) { continue; } if (!info->doUnroll) { *it=NULL; continue; } Node* header=info->header; LoopNode* loopHeader = lt->getLoopNode(header, false); assert(loopHeader->getHeader() == header); Node* checkNode = info->branchInst->getNode(); bodyANodes.clear(); bodyBNodes.clear(); calculateReachableNodesInLoop(loopHeader, loopHeader->getHeader(), checkNode, bodyANodes); calculateReachableNodesInLoop(loopHeader, checkNode, NULL, bodyBNodes); bodyANodes.intersectWith(bodyBNodes); bool checkNodeIsJunctionPoint = bodyANodes.isEmpty(); if (!checkNodeIsJunctionPoint) { if (Log::isEnabled()) { Log::out()<<"Check node is not a junction point -> removing from the list: branch inst id=I"<<info->branchInst->getId()<<std::endl; } *it=NULL; continue; } //check if branch semantic is OK ComparisonModifier cmpMod = info->branchInst->getModifier().getComparisonModifier(); if (cmpMod!=Cmp_GT && cmpMod!=Cmp_GTE && cmpMod!=Cmp_GT_Un && cmpMod!=Cmp_GTE_Un) { if (Log::isEnabled()) { Log::out()<<"Branch is not a range comparison -> removing from the list: branch inst id=I"<<info->branchInst->getId()<<std::endl; } *it=NULL; continue; } //check config settings bool failed = false; int nodesInLoop = (int)loopHeader->getNodesInLoop().size(); const char* reason = "unknown"; if (nodesInLoop > flags.largeLoopSize) { reason = "loop is too large"; failed = true; } else if (hasProfile) { int headHotness = (int)(header->getExecCount()*100.0 / cfg.getEntryNode()->getExecCount()); int minHeaderHotness= nodesInLoop <= flags.smallLoopSize ? flags.smallLoopHotness : nodesInLoop <= flags.mediumLoopSize ? flags.mediumLoopHotness : flags.largeLoopHotness; info->unrollCount = nodesInLoop <= flags.smallLoopSize ? flags.smallLoopUnrollCount : nodesInLoop <= flags.mediumLoopSize? flags.mediumLoopUnrollCount: flags.largeLoopUnrollCount; failed = headHotness < minHeaderHotness || info->unrollCount < 1; if (failed) { reason = "loop is too cold"; } } if (failed) { if (Log::isEnabled()) { Log::out()<<"Loop does not match unroll configuration ("<<reason<<") -> removing from the list: branch inst id=I"<<info->branchInst->getId()<<std::endl; } *it=NULL; } } //filter out loops with multiple exits for (UnrollInfos::iterator it1 = loopsToUnroll.begin(), end = loopsToUnroll.end();it1!=end; ++it1) { const LoopUnrollInfo* info1 = *it1; if (info1== NULL) { continue; } Node* header=info1->header; for (UnrollInfos::iterator it2 = it1+1; it2!=end; ++it2) { const LoopUnrollInfo* info2 = *it2; if (info2!=NULL && header==info2->header) { if (Log::isEnabled()) { Log::out() << "Found multiple exits:"; FlowGraph::printLabel(Log::out(), header);Log::out()<<std::endl; } if (hasProfile) { Node* check1 = info1->branchInst->getNode(); Node* check2 = info2->branchInst->getNode(); if (check1->getExecCount() > check2->getExecCount()) { *it2 = NULL; } else { *it1 = NULL; } } else { // random selection *it2=NULL; } } } } loopsToUnroll.erase(std::remove(loopsToUnroll.begin(), loopsToUnroll.end(), (LoopUnrollInfo*)NULL), loopsToUnroll.end()); if (loopsToUnroll.empty()) { if (Log::isEnabled()) Log::out() << "--------No candidates to unroll left after filtering"<<std::endl; return; } //dessa CFG before unrolling -> need to duplicate regions and we can do it on dessa form only today { SSABuilder::deconvertSSA(&cfg, irm.getOpndManager()); irm.setInSsa(false); } if (Log::isEnabled()) { Log::out()<<"--------Loops to unroll after filtering : n="<<loopsToUnroll.size()<<std::endl; for (UnrollInfos::const_iterator it = loopsToUnroll.begin(), end = loopsToUnroll.end();it!=end; ++it) { const LoopUnrollInfo* info = *it; info->print(Log::out()); Log::out()<<std::endl; } } for (UnrollInfos::const_iterator it = loopsToUnroll.begin(), end = loopsToUnroll.end();it!=end; ++it) { const LoopUnrollInfo* info = *it; doUnroll(mm, irm, info, flags); } };
void SplitSSAPass::_run(IRManager& irm) { SSABuilder::splitSsaWebs(&irm.getFlowGraph(), irm.getOpndManager()); }
void DeSSAPass::_run(IRManager& irm) { SSABuilder::deconvertSSA(&irm.getFlowGraph(),irm.getOpndManager()); irm.setInSsa(false); }