int FlagsFromString( const good::string& s, int iFlagsCount, const good::string aStrings[] ) { int iResult = 0; StringVector aFlags( s.split<good::vector>(' ', true) ); for ( int i=0; i < (int)aFlags.size(); ++i ) { int j; for ( j=0; j < iFlagsCount; ++j ) { if ( aFlags[i] == aStrings[j] ) { FLAG_SET( 1<<j, iResult ); break; } } if ( j == iFlagsCount ) // Couldn't find flag in array of strings, return an error. return -1; } return iResult; }
static void doUnroll(MemoryManager& mm, IRManager& irm, const LoopUnrollInfo* info, const UnrollFlags& flags) { //unroll algorithm does the following //before: // loopOrig { // bodyA // check(idxOpnd,limitOpnd) // bodyB // } //after: // unrolledIncOpnd = unrollCount * idx->increment // unrolledLimitOpnd = limitOpnd-unrolledIncOpnd; // bodyA // loopUnrolled { // check(idxOpnd,unrolledLimitOpnd) // bodyB // bodyA // bodyB // ... // bodyA // } // loopEpilogue { // check(idxOpnd,limitOpnd) // bodyB // bodyA // } // //where: // bodyA - all nodes of the same loop accessible from checkNode via incoming edges // bodyB - all nodes except bodyA and checkNode ControlFlowGraph& cfg = irm.getFlowGraph(); LoopTree* lt = cfg.getLoopTree(); InstFactory& instFactory = irm.getInstFactory(); OpndManager& opndManager = irm.getOpndManager(); Type* opType = info->getLimitOpnd()->getType(); // printf("UNROLL\n"); //STEP 0: cache all data needed assert(info->unrollCount >= 1); Node* origHeader = info->header; assert(origHeader->getInDegree() == 2); //loop is normalized OptPass::computeLoops(irm);//recompute loop info if needed LoopNode* loopNode = lt->getLoopNode(origHeader, false); Edge* entryEdge = origHeader->getInEdges().front(); if (lt->isBackEdge(entryEdge)) { entryEdge = origHeader->getInEdges().back(); } Node* origCheckNode = info->branchInst->getNode(); Edge* origLoopExitEdge = info->branchTargetIsExit ? origCheckNode->getTrueEdge() : origCheckNode->getFalseEdge(); U_32 maxNodeId = cfg.getMaxNodeId()+1; //+1 for a split check node StlBitVector nodesInLoop(mm, maxNodeId); { const Nodes& loopNodes = loopNode->getNodesInLoop(); for (Nodes::const_iterator it = loopNodes.begin(), end = loopNodes.end(); it!=end; ++it) { Node* node = *it; nodesInLoop.setBit(node->getId()); } } //STEP 1: calculate bodyA nodes BitSet aFlags(mm, maxNodeId); calculateReachableNodesInLoop(loopNode, origHeader, origCheckNode, aFlags); StlBitVector bodyANodes(mm, maxNodeId); for (U_32 i=0;i<maxNodeId;i++) bodyANodes.setBit(i, aFlags.getBit(i)); //STEP 2: make checkNode a separate node, prepare loop region bodyANodes.setBit(origCheckNode->getId(), true); Node* checkNode = cfg.splitNodeAtInstruction(info->branchInst->prev(), true, false, instFactory.makeLabel()); nodesInLoop.setBit(checkNode->getId(), true); Node* preCheckNode = origCheckNode; bodyANodes.setBit(preCheckNode->getId(), true); //STEP 3: rotate original loop // before: {bodyA1, check , bodyB} // after: bodyA2 {check, bodyB, bodyA1} Edge* bodyA2ToCheckEdge = NULL; Opnd* limitOpndInBodyA2 = NULL; { //WARN: info->limitOpnd and info->indexOpnd can be replaced after code duplication if promoted to vars Opnd* limitOpndBefore = info->getLimitOpnd(); assert(preCheckNode->getOutDegree()==1 && preCheckNode->getUnconditionalEdgeTarget() == checkNode); DefUseBuilder defUses(mm); defUses.initialize(cfg); OpndRenameTable opndRenameTable(mm, maxNodeId); //todo: maxNodeId is overkill estimate here NodeRenameTable nodeRenameTable(mm, maxNodeId); Node* bodyA2 = FlowGraph::duplicateRegion(irm, origHeader, bodyANodes, defUses, nodeRenameTable, opndRenameTable); cfg.replaceEdgeTarget(entryEdge, bodyA2, true); // while duplicating a region new nodes could be created and 'nodesInRegion' bitvector param is updated. // BodyA is part of the loop -> if new nodes were created in the loop we must track them. nodesInLoop.resize(bodyANodes.size()); for (U_32 i=0;i<bodyANodes.size();i++) nodesInLoop.setBit(i, bodyANodes.getBit(i) || nodesInLoop.getBit(i)); Node* bodyA2PreCheckNode = nodeRenameTable.getMapping(preCheckNode); assert(bodyA2PreCheckNode->getOutDegree()==1 && bodyA2PreCheckNode->getUnconditionalEdgeTarget() == checkNode); bodyA2ToCheckEdge = bodyA2PreCheckNode->getUnconditionalEdge(); limitOpndInBodyA2 = limitOpndBefore; if (nodeRenameTable.getMapping(limitOpndBefore->getInst()->getNode())!=NULL) { limitOpndInBodyA2 = opndRenameTable.getMapping(limitOpndBefore); } assert(limitOpndInBodyA2!=NULL); } //STEP 4: prepare epilogue loop: {check, bodyB, bodyA} Node* epilogueLoopHead = NULL; { DefUseBuilder defUses(mm); defUses.initialize(cfg); OpndRenameTable opndRenameTable(mm, maxNodeId); //todo: maxNodeId is overkill estimate here NodeRenameTable nodeRenameTable(mm, maxNodeId); epilogueLoopHead = FlowGraph::duplicateRegion(irm, checkNode, nodesInLoop, defUses, nodeRenameTable, opndRenameTable); cfg.replaceEdgeTarget(origLoopExitEdge, epilogueLoopHead, true); } //STEP 5: prepare unrolledLimitOpnd and replace it in original loop's check { Node* unrolledPreheader = cfg.spliceBlockOnEdge(bodyA2ToCheckEdge, instFactory.makeLabel()); Opnd* unrolledIncOpnd = opndManager.createSsaTmpOpnd(opType); unrolledPreheader->appendInst(instFactory.makeLdConst(unrolledIncOpnd, info->increment * info->unrollCount)); Opnd* unrolledLimitOpnd = opndManager.createSsaTmpOpnd(opType); Modifier mod = Modifier(SignedOp)|Modifier(Strict_No)|Modifier(Overflow_None)|Modifier(Exception_Never); unrolledPreheader->appendInst(instFactory.makeSub(mod, unrolledLimitOpnd, limitOpndInBodyA2, unrolledIncOpnd)); info->branchInst->setSrc(info->branchLimitOpndPos, unrolledLimitOpnd); } DefUseBuilder defUses(mm); defUses.initialize(cfg); //STEP 6: unroll original loop and remove all checks in duplicated bodies { Edge* backedge = preCheckNode->getUnconditionalEdge(); for (int i=1;i<info->unrollCount;i++) { OpndRenameTable opndRenameTable(mm, maxNodeId); NodeRenameTable nodeRenameTable(mm, maxNodeId); Node* unrolledRegionHeader = FlowGraph::duplicateRegion(irm, checkNode, nodesInLoop, defUses, nodeRenameTable, opndRenameTable); cfg.replaceEdgeTarget(backedge, unrolledRegionHeader, true); Node* newTail = nodeRenameTable.getMapping(preCheckNode); assert(newTail->getOutDegree()==1 ); backedge = newTail->getUnconditionalEdge(); cfg.replaceEdgeTarget(backedge, checkNode, true); //remove check from duplicated code Node* duplicateCheckNode = nodeRenameTable.getMapping(checkNode); assert(duplicateCheckNode->getOutDegree()==2); Edge* exitEdge = info->branchTargetIsExit ? duplicateCheckNode->getTrueEdge() : duplicateCheckNode->getFalseEdge(); duplicateCheckNode->getLastInst()->unlink(); cfg.removeEdge(exitEdge); } } //STEP 7: make old loop colder if (cfg.hasEdgeProfile()) { Edge* epilogueExit = info->branchTargetIsExit ? epilogueLoopHead->getTrueEdge() : epilogueLoopHead->getFalseEdge(); epilogueExit->setEdgeProb(epilogueExit->getEdgeProb() * 5); } }