Esempio n. 1
0
static void doUnroll(MemoryManager& mm, IRManager& irm, const LoopUnrollInfo* info, const UnrollFlags& flags) {
    //unroll algorithm does the following
    //before:
    // loopOrig {
    //    bodyA
    //    check(idxOpnd,limitOpnd)
    //    bodyB
    // }
    //after:
    // unrolledIncOpnd = unrollCount * idx->increment
    // unrolledLimitOpnd = limitOpnd-unrolledIncOpnd;
    // bodyA 
    // loopUnrolled {
    //     check(idxOpnd,unrolledLimitOpnd)
    //     bodyB
    //     bodyA
    //     bodyB
    //     ...
    //     bodyA
    // }
    // loopEpilogue {
    //    check(idxOpnd,limitOpnd)
    //    bodyB
    //    bodyA
    // }
    //
    //where:
    // bodyA - all nodes of the same loop accessible from checkNode via incoming edges
    // bodyB - all nodes except bodyA and checkNode

    ControlFlowGraph& cfg = irm.getFlowGraph();
    LoopTree* lt = cfg.getLoopTree();
    InstFactory& instFactory = irm.getInstFactory();
    OpndManager& opndManager = irm.getOpndManager();
    Type* opType = info->getLimitOpnd()->getType();
    
 //   printf("UNROLL\n");

    //STEP 0: cache all data needed
    assert(info->unrollCount >= 1);
    Node* origHeader = info->header;
    assert(origHeader->getInDegree() == 2); //loop is normalized

    OptPass::computeLoops(irm);//recompute loop info if needed
    LoopNode* loopNode = lt->getLoopNode(origHeader, false); 
    
    Edge* entryEdge = origHeader->getInEdges().front();
    if (lt->isBackEdge(entryEdge)) {
        entryEdge = origHeader->getInEdges().back();
    }
    Node* origCheckNode = info->branchInst->getNode();
    Edge* origLoopExitEdge = info->branchTargetIsExit ? origCheckNode->getTrueEdge() : origCheckNode->getFalseEdge();
    
    U_32 maxNodeId = cfg.getMaxNodeId()+1; //+1 for a split check node
    StlBitVector nodesInLoop(mm, maxNodeId);
    {
        const Nodes& loopNodes = loopNode->getNodesInLoop();
        for (Nodes::const_iterator it = loopNodes.begin(), end = loopNodes.end(); it!=end; ++it) {
            Node* node = *it;
            nodesInLoop.setBit(node->getId());
        }
    }
    
    
    //STEP 1: calculate bodyA nodes
    BitSet aFlags(mm, maxNodeId);
    calculateReachableNodesInLoop(loopNode, origHeader, origCheckNode, aFlags);
    StlBitVector bodyANodes(mm, maxNodeId);
    for (U_32 i=0;i<maxNodeId;i++) bodyANodes.setBit(i, aFlags.getBit(i));
    
    //STEP 2: make checkNode a separate node, prepare loop region
    bodyANodes.setBit(origCheckNode->getId(), true);
    Node* checkNode = cfg.splitNodeAtInstruction(info->branchInst->prev(), true, false, instFactory.makeLabel());
    nodesInLoop.setBit(checkNode->getId(), true);
    Node* preCheckNode = origCheckNode;
    bodyANodes.setBit(preCheckNode->getId(), true);
    
    //STEP 3: rotate original loop
    // before: {bodyA1, check , bodyB}
    // after:  bodyA2 {check, bodyB, bodyA1}
    Edge* bodyA2ToCheckEdge = NULL;
    Opnd* limitOpndInBodyA2 = NULL;
    {
        //WARN: info->limitOpnd and info->indexOpnd can be replaced after code duplication if promoted to vars
        Opnd* limitOpndBefore = info->getLimitOpnd();

        assert(preCheckNode->getOutDegree()==1 && preCheckNode->getUnconditionalEdgeTarget() == checkNode);
        DefUseBuilder defUses(mm);
        defUses.initialize(cfg);
        OpndRenameTable opndRenameTable(mm, maxNodeId); //todo: maxNodeId is overkill estimate here
        NodeRenameTable nodeRenameTable(mm, maxNodeId);
        Node* bodyA2 = FlowGraph::duplicateRegion(irm, origHeader, bodyANodes, defUses, nodeRenameTable, opndRenameTable);
        cfg.replaceEdgeTarget(entryEdge, bodyA2, true);
        
        // while duplicating a region new nodes could be created and 'nodesInRegion' bitvector param is updated. 
        // BodyA is part of the loop -> if new nodes were created in the loop we must track them.
        nodesInLoop.resize(bodyANodes.size());
        for (U_32 i=0;i<bodyANodes.size();i++) nodesInLoop.setBit(i, bodyANodes.getBit(i) || nodesInLoop.getBit(i));

        Node* bodyA2PreCheckNode = nodeRenameTable.getMapping(preCheckNode);
        assert(bodyA2PreCheckNode->getOutDegree()==1 && bodyA2PreCheckNode->getUnconditionalEdgeTarget() == checkNode);
        bodyA2ToCheckEdge = bodyA2PreCheckNode->getUnconditionalEdge();
        limitOpndInBodyA2 = limitOpndBefore;
        if (nodeRenameTable.getMapping(limitOpndBefore->getInst()->getNode())!=NULL) {
            limitOpndInBodyA2 = opndRenameTable.getMapping(limitOpndBefore);
        }
        assert(limitOpndInBodyA2!=NULL);
    }

    //STEP 4: prepare epilogue loop: {check, bodyB, bodyA}
    Node* epilogueLoopHead = NULL;
    {
        DefUseBuilder defUses(mm);
        defUses.initialize(cfg);
        OpndRenameTable opndRenameTable(mm, maxNodeId); //todo: maxNodeId is overkill estimate here
        NodeRenameTable nodeRenameTable(mm, maxNodeId);
        epilogueLoopHead = FlowGraph::duplicateRegion(irm, checkNode, nodesInLoop, defUses, nodeRenameTable, opndRenameTable);
        cfg.replaceEdgeTarget(origLoopExitEdge, epilogueLoopHead, true);
    }

    //STEP 5: prepare unrolledLimitOpnd and replace it in original loop's check
    {
        Node* unrolledPreheader = cfg.spliceBlockOnEdge(bodyA2ToCheckEdge, instFactory.makeLabel());
        Opnd* unrolledIncOpnd = opndManager.createSsaTmpOpnd(opType);
        unrolledPreheader->appendInst(instFactory.makeLdConst(unrolledIncOpnd, info->increment * info->unrollCount));
        Opnd* unrolledLimitOpnd = opndManager.createSsaTmpOpnd(opType);
        Modifier mod = Modifier(SignedOp)|Modifier(Strict_No)|Modifier(Overflow_None)|Modifier(Exception_Never);
        unrolledPreheader->appendInst(instFactory.makeSub(mod, unrolledLimitOpnd, limitOpndInBodyA2, unrolledIncOpnd));
        info->branchInst->setSrc(info->branchLimitOpndPos, unrolledLimitOpnd);
    }

    DefUseBuilder defUses(mm);
    defUses.initialize(cfg);
    //STEP 6: unroll original loop and remove all checks in duplicated bodies
    {
        Edge* backedge = preCheckNode->getUnconditionalEdge();
        for (int i=1;i<info->unrollCount;i++) {
            OpndRenameTable opndRenameTable(mm, maxNodeId);
            NodeRenameTable nodeRenameTable(mm, maxNodeId);

            Node* unrolledRegionHeader = FlowGraph::duplicateRegion(irm, checkNode, nodesInLoop, defUses, nodeRenameTable, opndRenameTable);
            cfg.replaceEdgeTarget(backedge, unrolledRegionHeader, true); 

            Node* newTail = nodeRenameTable.getMapping(preCheckNode);
            assert(newTail->getOutDegree()==1 );
            backedge = newTail->getUnconditionalEdge();
            cfg.replaceEdgeTarget(backedge, checkNode, true);
            
            //remove check from duplicated code
            Node* duplicateCheckNode = nodeRenameTable.getMapping(checkNode);
            assert(duplicateCheckNode->getOutDegree()==2);
            Edge* exitEdge = info->branchTargetIsExit ? duplicateCheckNode->getTrueEdge() : duplicateCheckNode->getFalseEdge();
            duplicateCheckNode->getLastInst()->unlink();
            cfg.removeEdge(exitEdge);
        }
    }
    
    //STEP 7: make old loop colder
    if (cfg.hasEdgeProfile()) {
        Edge* epilogueExit = info->branchTargetIsExit ? epilogueLoopHead->getTrueEdge() : epilogueLoopHead->getFalseEdge();
        epilogueExit->setEdgeProb(epilogueExit->getEdgeProb() * 5);
    }
}   
Esempio n. 2
0
void LoopUnrollPass::_run(IRManager& irm) {
    const UnrollFlags& flags = ((LoopUnrollAction*)getAction())->getFlags();
    
    OptPass::computeDominatorsAndLoops(irm);
    ControlFlowGraph& cfg = irm.getFlowGraph();
    LoopTree* lt = cfg.getLoopTree();
    if (!lt->hasLoops()) {
        return;
    }
    
    MemoryManager mm("loopUnrollMM");
    UnrollInfos loopsToUnroll(mm);
    findLoopsToUnroll(mm, irm, loopsToUnroll, flags);
    if (loopsToUnroll.empty()) {
        if (Log::isEnabled()) Log::out() << "No candidates found to unroll"<<std::endl;
        return;
    }
    if (Log::isEnabled()) {
        Log::out()<<"Loops to unroll before filtering:"<<std::endl;
        for (UnrollInfos::const_iterator it = loopsToUnroll.begin(), end = loopsToUnroll.end();it!=end; ++it) {
            const LoopUnrollInfo* info = *it;
            info->print(Log::out()); Log::out()<<std::endl;
        }
    }
    bool hasProfile =  cfg.hasEdgeProfile();
    //filter out that can't be unrolled, calculate BodyA and BodyB
    BitSet bodyANodes(mm, cfg.getMaxNodeId()), bodyBNodes(mm, cfg.getMaxNodeId());
    for (UnrollInfos::iterator it = loopsToUnroll.begin(), end = loopsToUnroll.end();it!=end; ++it) {
        LoopUnrollInfo* info = *it;
        if (info == NULL) {
            continue;
        }
        if (!info->doUnroll) {
            *it=NULL;
            continue;
        }
        Node* header=info->header;
        LoopNode* loopHeader = lt->getLoopNode(header, false);
        assert(loopHeader->getHeader() == header);

        Node* checkNode = info->branchInst->getNode();
        bodyANodes.clear();
        bodyBNodes.clear();
        calculateReachableNodesInLoop(loopHeader, loopHeader->getHeader(), checkNode, bodyANodes);
        calculateReachableNodesInLoop(loopHeader, checkNode, NULL, bodyBNodes);
        bodyANodes.intersectWith(bodyBNodes);
        bool checkNodeIsJunctionPoint = bodyANodes.isEmpty();
        if (!checkNodeIsJunctionPoint) {
            if (Log::isEnabled()) {
                Log::out()<<"Check node is not a junction point -> removing from the list: branch inst id=I"<<info->branchInst->getId()<<std::endl;
            }
            *it=NULL;
            continue;
        }
        //check if branch semantic is OK
        ComparisonModifier cmpMod = info->branchInst->getModifier().getComparisonModifier();
        if (cmpMod!=Cmp_GT && cmpMod!=Cmp_GTE && cmpMod!=Cmp_GT_Un && cmpMod!=Cmp_GTE_Un) {
            if (Log::isEnabled()) {
                Log::out()<<"Branch is not a range comparison -> removing from the list: branch inst id=I"<<info->branchInst->getId()<<std::endl;
            }
            *it=NULL;
            continue;
        }

        //check config settings
        bool failed = false;
        int nodesInLoop = (int)loopHeader->getNodesInLoop().size();
        const char* reason = "unknown";
        if (nodesInLoop > flags.largeLoopSize) {
            reason = "loop is too large";
            failed = true;
        } else if (hasProfile) {
            int headHotness = (int)(header->getExecCount()*100.0  / cfg.getEntryNode()->getExecCount());
            int minHeaderHotness= nodesInLoop <= flags.smallLoopSize ? flags.smallLoopHotness :
                nodesInLoop <= flags.mediumLoopSize ? flags.mediumLoopHotness : flags.largeLoopHotness;
            info->unrollCount = nodesInLoop <= flags.smallLoopSize ? flags.smallLoopUnrollCount :
                nodesInLoop <= flags.mediumLoopSize? flags.mediumLoopUnrollCount: flags.largeLoopUnrollCount;
            failed = headHotness < minHeaderHotness || info->unrollCount < 1;
            if (failed) {
                reason = "loop is too cold";
            }
        }
        if (failed) {
            if (Log::isEnabled()) {
                Log::out()<<"Loop does not match unroll configuration ("<<reason<<") -> removing from the list: branch inst id=I"<<info->branchInst->getId()<<std::endl;
            }
            *it=NULL;
        }
    }    
    //filter out loops with multiple exits 
    for (UnrollInfos::iterator it1 = loopsToUnroll.begin(), end = loopsToUnroll.end();it1!=end; ++it1) {
        const LoopUnrollInfo* info1 = *it1;
        if (info1== NULL) {
            continue;
        }
        Node* header=info1->header;
        for (UnrollInfos::iterator it2 = it1+1; it2!=end; ++it2) {
            const LoopUnrollInfo* info2 = *it2;
            if (info2!=NULL && header==info2->header) {
                if (Log::isEnabled()) {
                    Log::out() << "Found multiple exits:"; FlowGraph::printLabel(Log::out(), header);Log::out()<<std::endl;
                }
                if (hasProfile)  {
                    Node* check1 = info1->branchInst->getNode();
                    Node* check2 = info2->branchInst->getNode();
                    if (check1->getExecCount() > check2->getExecCount()) {
                        *it2 = NULL;
                    } else {
                        *it1 = NULL;
                    }
                } else { // random selection
                    *it2=NULL;
                }
            }
        }
    }    
    loopsToUnroll.erase(std::remove(loopsToUnroll.begin(), loopsToUnroll.end(), (LoopUnrollInfo*)NULL), loopsToUnroll.end());
    if (loopsToUnroll.empty()) {
        if (Log::isEnabled()) Log::out() << "--------No candidates to unroll left after filtering"<<std::endl;
        return;
    }
    
    //dessa CFG before unrolling -> need to duplicate regions and we can do it on dessa form only today
    {
        SSABuilder::deconvertSSA(&cfg, irm.getOpndManager());
        irm.setInSsa(false);
    }

    if (Log::isEnabled()) {
        Log::out()<<"--------Loops to unroll after filtering : n="<<loopsToUnroll.size()<<std::endl;
            for (UnrollInfos::const_iterator it = loopsToUnroll.begin(), end = loopsToUnroll.end();it!=end; ++it) {
                const LoopUnrollInfo* info = *it;
                info->print(Log::out()); Log::out()<<std::endl;
            }
    }

    for (UnrollInfos::const_iterator it = loopsToUnroll.begin(), end = loopsToUnroll.end();it!=end; ++it) {
        const LoopUnrollInfo* info = *it;
        doUnroll(mm, irm, info, flags);
    }
};