void findLoopsToUnroll(MemoryManager& tmpMM, IRManager& irm, UnrollInfos& result, const UnrollFlags& flags) { ControlFlowGraph& fg = irm.getFlowGraph(); LoopTree* lt = fg.getLoopTree(); //find all loop exits Edges loopExits(tmpMM); const Nodes& nodes = fg.getNodes(); for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it!=end; ++it) { Node* node = *it; LoopNode* loopNode = lt->getLoopNode(node, false); if (loopNode == NULL) { continue; //node not in a loop } if (!flags.unrollParentLoops && loopNode->getChild()!=NULL) { continue; //skip parent loops } const Edges& edges = node->getOutEdges(); for (Edges::const_iterator ite = edges.begin(), ende = edges.end(); ite!=ende; ++ite) { Edge* edge = *ite; if (lt->isLoopExit(edge)) { loopExits.push_back(edge); } } } //filter out all edges except branches for (Edges::iterator ite = loopExits.begin(), ende = loopExits.end(); ite!=ende; ++ite) { Edge* edge = *ite; if (edge->isDispatchEdge() || edge->isUnconditionalEdge() || edge->isCatchEdge()) { *ite = NULL; continue; } Inst* lastInst = (Inst*)edge->getSourceNode()->getLastInst(); if (lastInst->isSwitch()) { *ite = NULL; continue; } assert(lastInst->isBranch()); assert(edge->isFalseEdge() || edge->isTrueEdge()); } loopExits.erase(std::remove(loopExits.begin(), loopExits.end(), (Edge*)NULL), loopExits.end()); // analyze every loop exit and prepare unroll info for (Edges::const_iterator ite = loopExits.begin(), ende = loopExits.end(); ite!=ende; ++ite) { Edge* edge = *ite; Node* sourceNode = edge->getSourceNode(); Inst* lastInst = (Inst*)sourceNode->getLastInst(); assert(lastInst->isBranch()); LoopUnrollInfo* info = prepareUnrollInfo(tmpMM, lt, lastInst->asBranchInst()); if (info == NULL) { continue; } if (Log::isEnabled()) { info->print(Log::out()); Log::out()<<std::endl; } result.push_back(info); } }
//------------------------------find_unswitching_candidate----------------------------- // Find candidate "if" for unswitching IfNode* PhaseIdealLoop::find_unswitching_candidate(const IdealLoopTree *loop) const { // Find first invariant test that doesn't exit the loop LoopNode *head = loop->_head->as_Loop(); IfNode* unswitch_iff = NULL; Node* n = head->in(LoopNode::LoopBackControl); while (n != head) { Node* n_dom = idom(n); if (n->is_Region()) { if (n_dom->is_If()) { IfNode* iff = n_dom->as_If(); if (iff->in(1)->is_Bool()) { BoolNode* bol = iff->in(1)->as_Bool(); if (bol->in(1)->is_Cmp()) { // If condition is invariant and not a loop exit, // then found reason to unswitch. if (loop->is_invariant(bol) && !loop->is_loop_exit(iff)) { unswitch_iff = iff; } } } } } n = n_dom; } return unswitch_iff; }
LoopNode* PhaseIdealLoop::create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk) { Node_List old_new; LoopNode* head = loop->_head->as_Loop(); bool counted_loop = head->is_CountedLoop(); Node* entry = head->in(LoopNode::EntryControl); _igvn.rehash_node_delayed(entry); IdealLoopTree* outer_loop = loop->_parent; ConINode* const_1 = _igvn.intcon(1); set_ctrl(const_1, C->root()); IfNode* iff = new IfNode(entry, const_1, PROB_MAX, COUNT_UNKNOWN); register_node(iff, outer_loop, entry, dom_depth(entry)); ProjNode* iffast = new IfTrueNode(iff); register_node(iffast, outer_loop, iff, dom_depth(iff)); ProjNode* ifslow = new IfFalseNode(iff); register_node(ifslow, outer_loop, iff, dom_depth(iff)); // Clone the loop body. The clone becomes the fast loop. The // original pre-header will (illegally) have 3 control users // (old & new loops & new if). clone_loop(loop, old_new, dom_depth(head), iff); assert(old_new[head->_idx]->is_Loop(), "" ); LoopNode* slow_head = old_new[head->_idx]->as_Loop(); #ifndef PRODUCT if (TraceLoopOpts) { tty->print_cr("PhaseIdealLoop::create_reserve_version_of_loop:"); tty->print("\t iff = %d, ", iff->_idx); iff->dump(); tty->print("\t iffast = %d, ", iffast->_idx); iffast->dump(); tty->print("\t ifslow = %d, ", ifslow->_idx); ifslow->dump(); tty->print("\t before replace_input_of: head = %d, ", head->_idx); head->dump(); tty->print("\t before replace_input_of: slow_head = %d, ", slow_head->_idx); slow_head->dump(); } #endif // Fast (true) control _igvn.replace_input_of(head, LoopNode::EntryControl, iffast); // Slow (false) control _igvn.replace_input_of(slow_head, LoopNode::EntryControl, ifslow); recompute_dom_depth(); lk->set_iff(iff); #ifndef PRODUCT if (TraceLoopOpts ) { tty->print("\t after replace_input_of: head = %d, ", head->_idx); head->dump(); tty->print("\t after replace_input_of: slow_head = %d, ", slow_head->_idx); slow_head->dump(); } #endif return slow_head->as_Loop(); }
//------------------------------policy_unswitching----------------------------- // Return TRUE or FALSE if the loop should be unswitched // (ie. clone loop with an invariant test that does not exit the loop) bool IdealLoopTree::policy_unswitching( PhaseIdealLoop *phase ) const { if( !LoopUnswitching ) { return false; } uint nodes_left = MaxNodeLimit - phase->C->unique(); if (2 * _body.size() > nodes_left) { return false; // Too speculative if running low on nodes. } LoopNode* head = _head->as_Loop(); if (head->unswitch_count() + 1 > head->unswitch_max()) { return false; } return phase->find_unswitching_candidate(this) != NULL; }
void Block::dump_head( const Block_Array *bbs ) const { // Print the basic block dump_bidx(this); C2OUT->print(": #\t"); // Print the incoming CFG edges and the outgoing CFG edges for( uint i=0; i<_num_succs; i++ ) { non_connector_successor(i)->dump_bidx(_succs[i]); C2OUT->print(" "); } C2OUT->print("<- "); if( head()->is_block_start() ) { for (uint i=1; i<num_preds(); i++) { Node *s = pred(i); if (bbs) { Block *p = (*bbs)[s->_idx]; p->dump_pred(bbs, p); } else { while (!s->is_block_start()) s = s->in(0); C2OUT->print("N%d ",s->_idx); } } } else C2OUT->print("BLOCK HEAD IS JUNK "); // Print loop, if any const Block *bhead = this; // Head of self-loop Node *bh = bhead->head(); if( bbs && bh->is_Loop() && !head()->is_Root() ) { LoopNode *loop = bh->as_Loop(); const Block *bx = (*bbs)[loop->in(LoopNode::LoopBackControl)->_idx]; while (bx->is_connector()) { bx = (*bbs)[bx->pred(1)->_idx]; } C2OUT->print("\tLoop: B%d-B%d ",bhead->_pre_order,bx->_pre_order); // Dump any loop-specific bits, especially for CountedLoops. loop->dump_spec(C2OUT); } C2OUT->print(" Freq: %g",_freq); if( Verbose ) { C2OUT->print(" IDom: %d/#%d",_idom?_idom->_pre_order:0,_dom_depth); C2OUT->print(" RegPressure: %d",_reg_pressure); C2OUT->print(" IHRP Index: %d",_ihrp_index); C2OUT->print(" FRegPressure: %d",_freg_pressure); C2OUT->print(" FHRP Index: %d",_fhrp_index); } C2OUT->cr(); }
//-------------------------create_slow_version_of_loop------------------------ // Create a slow version of the loop by cloning the loop // and inserting an if to select fast-slow versions. // Return control projection of the entry to the fast version. ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop, Node_List &old_new, int opcode) { LoopNode* head = loop->_head->as_Loop(); bool counted_loop = head->is_CountedLoop(); Node* entry = head->in(LoopNode::EntryControl); _igvn.rehash_node_delayed(entry); IdealLoopTree* outer_loop = loop->_parent; Node *cont = _igvn.intcon(1); set_ctrl(cont, C->root()); Node* opq = new Opaque1Node(C, cont); register_node(opq, outer_loop, entry, dom_depth(entry)); Node *bol = new Conv2BNode(opq); register_node(bol, outer_loop, entry, dom_depth(entry)); IfNode* iff = (opcode == Op_RangeCheck) ? new RangeCheckNode(entry, bol, PROB_MAX, COUNT_UNKNOWN) : new IfNode(entry, bol, PROB_MAX, COUNT_UNKNOWN); register_node(iff, outer_loop, entry, dom_depth(entry)); ProjNode* iffast = new IfTrueNode(iff); register_node(iffast, outer_loop, iff, dom_depth(iff)); ProjNode* ifslow = new IfFalseNode(iff); register_node(ifslow, outer_loop, iff, dom_depth(iff)); // Clone the loop body. The clone becomes the fast loop. The // original pre-header will (illegally) have 3 control users // (old & new loops & new if). clone_loop(loop, old_new, dom_depth(head), iff); assert(old_new[head->_idx]->is_Loop(), "" ); // Fast (true) control Node* iffast_pred = clone_loop_predicates(entry, iffast, !counted_loop); _igvn.replace_input_of(head, LoopNode::EntryControl, iffast_pred); set_idom(head, iffast_pred, dom_depth(head)); // Slow (false) control Node* ifslow_pred = clone_loop_predicates(entry, ifslow, !counted_loop); LoopNode* slow_head = old_new[head->_idx]->as_Loop(); _igvn.replace_input_of(slow_head, LoopNode::EntryControl, ifslow_pred); set_idom(slow_head, ifslow_pred, dom_depth(slow_head)); recompute_dom_depth(); return iffast; }
//-------------------------create_slow_version_of_loop------------------------ // Create a slow version of the loop by cloning the loop // and inserting an if to select fast-slow versions. // Return control projection of the entry to the fast version. ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop, Node_List &old_new) { LoopNode* head = loop->_head->as_Loop(); Node* entry = head->in(LoopNode::EntryControl); _igvn.hash_delete(entry); _igvn._worklist.push(entry); IdealLoopTree* outer_loop = loop->_parent; Node *cont = _igvn.intcon(1); set_ctrl(cont, C->root()); Node* opq = new (C, 2) Opaque1Node(C, cont); register_node(opq, outer_loop, entry, dom_depth(entry)); Node *bol = new (C, 2) Conv2BNode(opq); register_node(bol, outer_loop, entry, dom_depth(entry)); IfNode* iff = new (C, 2) IfNode(entry, bol, PROB_MAX, COUNT_UNKNOWN); register_node(iff, outer_loop, entry, dom_depth(entry)); ProjNode* iffast = new (C, 1) IfTrueNode(iff); register_node(iffast, outer_loop, iff, dom_depth(iff)); ProjNode* ifslow = new (C, 1) IfFalseNode(iff); register_node(ifslow, outer_loop, iff, dom_depth(iff)); // Clone the loop body. The clone becomes the fast loop. The // original pre-header will (illegally) have 2 control users (old & new loops). clone_loop(loop, old_new, dom_depth(head), iff); assert(old_new[head->_idx]->is_Loop(), "" ); // Fast (true) control _igvn.hash_delete(head); head->set_req(LoopNode::EntryControl, iffast); set_idom(head, iffast, dom_depth(head)); _igvn._worklist.push(head); // Slow (false) control LoopNode* slow_head = old_new[head->_idx]->as_Loop(); _igvn.hash_delete(slow_head); slow_head->set_req(LoopNode::EntryControl, ifslow); set_idom(slow_head, ifslow, dom_depth(slow_head)); _igvn._worklist.push(slow_head); recompute_dom_depth(); return iffast; }
//------------------------------policy_unswitching----------------------------- // Return TRUE or FALSE if the loop should be unswitched // (ie. clone loop with an invariant test that does not exit the loop) bool IdealLoopTree::policy_unswitching( PhaseIdealLoop *phase ) const { if( !LoopUnswitching ) { return false; } if (!_head->is_Loop()) { return false; } // check for vectorized loops, any unswitching was already applied if (_head->is_CountedLoop() && _head->as_CountedLoop()->do_unroll_only()) { return false; } int nodes_left = phase->C->max_node_limit() - phase->C->live_nodes(); if ((int)(2 * _body.size()) > nodes_left) { return false; // Too speculative if running low on nodes. } LoopNode* head = _head->as_Loop(); if (head->unswitch_count() + 1 > head->unswitch_max()) { return false; } return phase->find_unswitching_candidate(this) != NULL; }
//------------------------------do_unswitching----------------------------- // Clone loop with an invariant test (that does not exit) and // insert a clone of the test that selects which version to // execute. void PhaseIdealLoop::do_unswitching (IdealLoopTree *loop, Node_List &old_new) { // Find first invariant test that doesn't exit the loop LoopNode *head = loop->_head->as_Loop(); IfNode* unswitch_iff = find_unswitching_candidate((const IdealLoopTree *)loop); assert(unswitch_iff != NULL, "should be at least one"); // Need to revert back to normal loop if (head->is_CountedLoop() && !head->as_CountedLoop()->is_normal_loop()) { head->as_CountedLoop()->set_normal_loop(); } ProjNode* proj_true = create_slow_version_of_loop(loop, old_new); assert(proj_true->is_IfTrue() && proj_true->unique_ctrl_out() == head, "by construction"); // Increment unswitch count LoopNode* head_clone = old_new[head->_idx]->as_Loop(); int nct = head->unswitch_count() + 1; head->set_unswitch_count(nct); head_clone->set_unswitch_count(nct); // Add test to new "if" outside of loop IfNode* invar_iff = proj_true->in(0)->as_If(); Node* invar_iff_c = invar_iff->in(0); BoolNode* bol = unswitch_iff->in(1)->as_Bool(); invar_iff->set_req(1, bol); invar_iff->_prob = unswitch_iff->_prob; ProjNode* proj_false = invar_iff->proj_out(0)->as_Proj(); // Hoist invariant casts out of each loop to the appropriate // control projection. Node_List worklist; for (DUIterator_Fast imax, i = unswitch_iff->fast_outs(imax); i < imax; i++) { ProjNode* proj= unswitch_iff->fast_out(i)->as_Proj(); // Copy to a worklist for easier manipulation for (DUIterator_Fast jmax, j = proj->fast_outs(jmax); j < jmax; j++) { Node* use = proj->fast_out(j); if (use->Opcode() == Op_CheckCastPP && loop->is_invariant(use->in(1))) { worklist.push(use); } } ProjNode* invar_proj = invar_iff->proj_out(proj->_con)->as_Proj(); while (worklist.size() > 0) { Node* use = worklist.pop(); Node* nuse = use->clone(); nuse->set_req(0, invar_proj); _igvn.hash_delete(use); use->set_req(1, nuse); _igvn._worklist.push(use); register_new_node(nuse, invar_proj); // Same for the clone Node* use_clone = old_new[use->_idx]; _igvn.hash_delete(use_clone); use_clone->set_req(1, nuse); _igvn._worklist.push(use_clone); } } // Hardwire the control paths in the loops into if(true) and if(false) _igvn.hash_delete(unswitch_iff); short_circuit_if(unswitch_iff, proj_true); _igvn._worklist.push(unswitch_iff); IfNode* unswitch_iff_clone = old_new[unswitch_iff->_idx]->as_If(); _igvn.hash_delete(unswitch_iff_clone); short_circuit_if(unswitch_iff_clone, proj_false); _igvn._worklist.push(unswitch_iff_clone); // Reoptimize loops loop->record_for_igvn(); for(int i = loop->_body.size() - 1; i >= 0 ; i--) { Node *n = loop->_body[i]; Node *n_clone = old_new[n->_idx]; _igvn._worklist.push(n_clone); } #ifndef PRODUCT if (TraceLoopUnswitching) { tty->print_cr("Loop unswitching orig: %d @ %d new: %d @ %d", head->_idx, unswitch_iff->_idx, old_new[head->_idx]->_idx, unswitch_iff_clone->_idx); } #endif C->set_major_progress(); }
list<Node*> AST::process_node(tinyxml2::XMLNode* node) { /* {{{ */ list<Node*> statements; while (node != NULL && node->ToElement() != NULL) { std::string node_type(node->Value()); // Assignments if (node_type == "setq") { AssignmentNode* an = do_assignment(node->ToElement()); statements.push_back(an); #ifdef DEBUG std::cout << an->to_string() << std::endl; #endif } else if (node_type == "if") { ConditionalNode* cond = do_conditional(node->ToElement()); statements.push_back(cond); #ifdef DEBUG std::cout << cond->to_string() << std::endl; #endif } else if (node_type == "loop") { LoopNode* loop; if (node->ToElement()->Attribute("type", "for")) loop = new LoopNode(LoopNode::LOOP_FOR); else if (node->ToElement()->Attribute("type", "while")) loop = new LoopNode(LoopNode::LOOP_WHILE); else if (node->ToElement()->Attribute("type", "do-while")) loop = new LoopNode(LoopNode::LOOP_DO_WHILE); // Invalid loop type else { std::string loop_type = node->ToElement()->Attribute("type"); std::cerr << "Invalid loop type: " << loop_type << std::endl; exit(1); } // Get condition tinyxml2::XMLElement* cond = node->FirstChildElement("cond"); // Condition is an operation (e.g. +, -, <) if (cond->FirstChildElement("o") != NULL) { loop->set_condition(do_operator(cond->FirstChildElement("o"))); } // Condition is a variable else if (cond->FirstChildElement("v") != NULL) { // Set it to variable std::string var_name = cond->FirstChild()->Value(); loop->set_condition(new ValueNode(var_name, reg_number(var_name, VAR_REG))); } // Condition is a constant else if (cond->FirstChildElement("c") != NULL) { // Set it to variable (has to be int) std::string val(cond->FirstChild()->Value()); int i; // Parse int sscanf(val.c_str(), "%d", &i); loop->set_condition(new ValueNode(i)); } // Get body (and process) loop->set_body(process_node(node->FirstChildElement("body")->FirstChild())); #ifdef DEBUG std::cout << loop->to_string() << std::endl; #endif // Add loop to statements statements.push_back(loop); } node = node->NextSibling(); } return statements; } /* }}} */
static void doUnroll(MemoryManager& mm, IRManager& irm, const LoopUnrollInfo* info, const UnrollFlags& flags) { //unroll algorithm does the following //before: // loopOrig { // bodyA // check(idxOpnd,limitOpnd) // bodyB // } //after: // unrolledIncOpnd = unrollCount * idx->increment // unrolledLimitOpnd = limitOpnd-unrolledIncOpnd; // bodyA // loopUnrolled { // check(idxOpnd,unrolledLimitOpnd) // bodyB // bodyA // bodyB // ... // bodyA // } // loopEpilogue { // check(idxOpnd,limitOpnd) // bodyB // bodyA // } // //where: // bodyA - all nodes of the same loop accessible from checkNode via incoming edges // bodyB - all nodes except bodyA and checkNode ControlFlowGraph& cfg = irm.getFlowGraph(); LoopTree* lt = cfg.getLoopTree(); InstFactory& instFactory = irm.getInstFactory(); OpndManager& opndManager = irm.getOpndManager(); Type* opType = info->getLimitOpnd()->getType(); // printf("UNROLL\n"); //STEP 0: cache all data needed assert(info->unrollCount >= 1); Node* origHeader = info->header; assert(origHeader->getInDegree() == 2); //loop is normalized OptPass::computeLoops(irm);//recompute loop info if needed LoopNode* loopNode = lt->getLoopNode(origHeader, false); Edge* entryEdge = origHeader->getInEdges().front(); if (lt->isBackEdge(entryEdge)) { entryEdge = origHeader->getInEdges().back(); } Node* origCheckNode = info->branchInst->getNode(); Edge* origLoopExitEdge = info->branchTargetIsExit ? origCheckNode->getTrueEdge() : origCheckNode->getFalseEdge(); U_32 maxNodeId = cfg.getMaxNodeId()+1; //+1 for a split check node StlBitVector nodesInLoop(mm, maxNodeId); { const Nodes& loopNodes = loopNode->getNodesInLoop(); for (Nodes::const_iterator it = loopNodes.begin(), end = loopNodes.end(); it!=end; ++it) { Node* node = *it; nodesInLoop.setBit(node->getId()); } } //STEP 1: calculate bodyA nodes BitSet aFlags(mm, maxNodeId); calculateReachableNodesInLoop(loopNode, origHeader, origCheckNode, aFlags); StlBitVector bodyANodes(mm, maxNodeId); for (U_32 i=0;i<maxNodeId;i++) bodyANodes.setBit(i, aFlags.getBit(i)); //STEP 2: make checkNode a separate node, prepare loop region bodyANodes.setBit(origCheckNode->getId(), true); Node* checkNode = cfg.splitNodeAtInstruction(info->branchInst->prev(), true, false, instFactory.makeLabel()); nodesInLoop.setBit(checkNode->getId(), true); Node* preCheckNode = origCheckNode; bodyANodes.setBit(preCheckNode->getId(), true); //STEP 3: rotate original loop // before: {bodyA1, check , bodyB} // after: bodyA2 {check, bodyB, bodyA1} Edge* bodyA2ToCheckEdge = NULL; Opnd* limitOpndInBodyA2 = NULL; { //WARN: info->limitOpnd and info->indexOpnd can be replaced after code duplication if promoted to vars Opnd* limitOpndBefore = info->getLimitOpnd(); assert(preCheckNode->getOutDegree()==1 && preCheckNode->getUnconditionalEdgeTarget() == checkNode); DefUseBuilder defUses(mm); defUses.initialize(cfg); OpndRenameTable opndRenameTable(mm, maxNodeId); //todo: maxNodeId is overkill estimate here NodeRenameTable nodeRenameTable(mm, maxNodeId); Node* bodyA2 = FlowGraph::duplicateRegion(irm, origHeader, bodyANodes, defUses, nodeRenameTable, opndRenameTable); cfg.replaceEdgeTarget(entryEdge, bodyA2, true); // while duplicating a region new nodes could be created and 'nodesInRegion' bitvector param is updated. // BodyA is part of the loop -> if new nodes were created in the loop we must track them. nodesInLoop.resize(bodyANodes.size()); for (U_32 i=0;i<bodyANodes.size();i++) nodesInLoop.setBit(i, bodyANodes.getBit(i) || nodesInLoop.getBit(i)); Node* bodyA2PreCheckNode = nodeRenameTable.getMapping(preCheckNode); assert(bodyA2PreCheckNode->getOutDegree()==1 && bodyA2PreCheckNode->getUnconditionalEdgeTarget() == checkNode); bodyA2ToCheckEdge = bodyA2PreCheckNode->getUnconditionalEdge(); limitOpndInBodyA2 = limitOpndBefore; if (nodeRenameTable.getMapping(limitOpndBefore->getInst()->getNode())!=NULL) { limitOpndInBodyA2 = opndRenameTable.getMapping(limitOpndBefore); } assert(limitOpndInBodyA2!=NULL); } //STEP 4: prepare epilogue loop: {check, bodyB, bodyA} Node* epilogueLoopHead = NULL; { DefUseBuilder defUses(mm); defUses.initialize(cfg); OpndRenameTable opndRenameTable(mm, maxNodeId); //todo: maxNodeId is overkill estimate here NodeRenameTable nodeRenameTable(mm, maxNodeId); epilogueLoopHead = FlowGraph::duplicateRegion(irm, checkNode, nodesInLoop, defUses, nodeRenameTable, opndRenameTable); cfg.replaceEdgeTarget(origLoopExitEdge, epilogueLoopHead, true); } //STEP 5: prepare unrolledLimitOpnd and replace it in original loop's check { Node* unrolledPreheader = cfg.spliceBlockOnEdge(bodyA2ToCheckEdge, instFactory.makeLabel()); Opnd* unrolledIncOpnd = opndManager.createSsaTmpOpnd(opType); unrolledPreheader->appendInst(instFactory.makeLdConst(unrolledIncOpnd, info->increment * info->unrollCount)); Opnd* unrolledLimitOpnd = opndManager.createSsaTmpOpnd(opType); Modifier mod = Modifier(SignedOp)|Modifier(Strict_No)|Modifier(Overflow_None)|Modifier(Exception_Never); unrolledPreheader->appendInst(instFactory.makeSub(mod, unrolledLimitOpnd, limitOpndInBodyA2, unrolledIncOpnd)); info->branchInst->setSrc(info->branchLimitOpndPos, unrolledLimitOpnd); } DefUseBuilder defUses(mm); defUses.initialize(cfg); //STEP 6: unroll original loop and remove all checks in duplicated bodies { Edge* backedge = preCheckNode->getUnconditionalEdge(); for (int i=1;i<info->unrollCount;i++) { OpndRenameTable opndRenameTable(mm, maxNodeId); NodeRenameTable nodeRenameTable(mm, maxNodeId); Node* unrolledRegionHeader = FlowGraph::duplicateRegion(irm, checkNode, nodesInLoop, defUses, nodeRenameTable, opndRenameTable); cfg.replaceEdgeTarget(backedge, unrolledRegionHeader, true); Node* newTail = nodeRenameTable.getMapping(preCheckNode); assert(newTail->getOutDegree()==1 ); backedge = newTail->getUnconditionalEdge(); cfg.replaceEdgeTarget(backedge, checkNode, true); //remove check from duplicated code Node* duplicateCheckNode = nodeRenameTable.getMapping(checkNode); assert(duplicateCheckNode->getOutDegree()==2); Edge* exitEdge = info->branchTargetIsExit ? duplicateCheckNode->getTrueEdge() : duplicateCheckNode->getFalseEdge(); duplicateCheckNode->getLastInst()->unlink(); cfg.removeEdge(exitEdge); } } //STEP 7: make old loop colder if (cfg.hasEdgeProfile()) { Edge* epilogueExit = info->branchTargetIsExit ? epilogueLoopHead->getTrueEdge() : epilogueLoopHead->getFalseEdge(); epilogueExit->setEdgeProb(epilogueExit->getEdgeProb() * 5); } }
static LoopUnrollInfo* prepareUnrollInfo(MemoryManager& mm, LoopTree* lt, BranchInst* branchInst) { if (Log::isEnabled()) { Log::out()<<"==Checking loop exit:"; branchInst->print(Log::out()); Log::out()<<std::endl; } //traverse loop and track all modifications Node* node = branchInst->getNode(); LoopNode* loopHeader = lt->getLoopNode(node, false); Opnd* opnd1 = branchInst->getSrc(0); Opnd* opnd2 = branchInst->getNumSrcOperands()==1?NULL:branchInst->getSrc(1); if (opnd2==NULL) { assert(branchInst->getComparisonModifier() == Cmp_Zero || branchInst->getComparisonModifier() == Cmp_NonZero); assert(opnd1->getType()->isObject()); if (Log::isEnabled()) { Log::out()<<"----Unsupported comparison modifier."<<std::endl; } return NULL; } if (!opnd1->getType()->isInteger() || opnd1->getType()->isInt8() || !opnd2->getType()->isInteger() || opnd2->getType()->isInt8()) { if (Log::isEnabled()) { Log::out()<<"----Unsupported opnd types."<<std::endl; } return NULL; //IMPROVE: longs and floating types are not supported } InstStack defStack(mm); Log::out()<<"----Analyzing opnd1 id="<<opnd1->getId()<<std::endl; OpndLoopInfo opndInfo1 = processOpnd(loopHeader, lt, defStack, opnd1); assert(defStack.empty()); Log::out()<<"----Analyzing opnd2 id="<<opnd2->getId()<<std::endl; OpndLoopInfo opndInfo2 = processOpnd(loopHeader, lt, defStack, opnd2); assert(defStack.empty()); if(Log::isEnabled()) { Log::out()<<"----Result: opndId1="<<opnd1->getId()<<" type=";opndInfo1.print(Log::out()); Log::out()<<", opndId2="<<opnd2->getId()<<" type=";opndInfo2.print(Log::out());Log::out()<<std::endl; } //default values -> this item will not be unrolled unless all constraints are OK LoopUnrollInfo* info = new (mm) LoopUnrollInfo(); info->header = loopHeader->getHeader(); info->branchInst = branchInst; info->branchTargetIsExit = !loopHeader->inLoop(branchInst->getTargetLabel()->getNode()); info->doUnroll = false; if (opndInfo1.isCounter() && (opndInfo2.isDOL() || opndInfo2.isLDConst())) { info->doUnroll = true; info->branchLimitOpndPos=1; info->increment = opndInfo1.getIncrement(); } else if (opndInfo2.isCounter() && (opndInfo1.isDOL() || opndInfo1.isLDConst())) { info->doUnroll = true; info->branchLimitOpndPos=0; info->increment = opndInfo2.getIncrement(); } return info; }
void LoopUnrollPass::_run(IRManager& irm) { const UnrollFlags& flags = ((LoopUnrollAction*)getAction())->getFlags(); OptPass::computeDominatorsAndLoops(irm); ControlFlowGraph& cfg = irm.getFlowGraph(); LoopTree* lt = cfg.getLoopTree(); if (!lt->hasLoops()) { return; } MemoryManager mm("loopUnrollMM"); UnrollInfos loopsToUnroll(mm); findLoopsToUnroll(mm, irm, loopsToUnroll, flags); if (loopsToUnroll.empty()) { if (Log::isEnabled()) Log::out() << "No candidates found to unroll"<<std::endl; return; } if (Log::isEnabled()) { Log::out()<<"Loops to unroll before filtering:"<<std::endl; for (UnrollInfos::const_iterator it = loopsToUnroll.begin(), end = loopsToUnroll.end();it!=end; ++it) { const LoopUnrollInfo* info = *it; info->print(Log::out()); Log::out()<<std::endl; } } bool hasProfile = cfg.hasEdgeProfile(); //filter out that can't be unrolled, calculate BodyA and BodyB BitSet bodyANodes(mm, cfg.getMaxNodeId()), bodyBNodes(mm, cfg.getMaxNodeId()); for (UnrollInfos::iterator it = loopsToUnroll.begin(), end = loopsToUnroll.end();it!=end; ++it) { LoopUnrollInfo* info = *it; if (info == NULL) { continue; } if (!info->doUnroll) { *it=NULL; continue; } Node* header=info->header; LoopNode* loopHeader = lt->getLoopNode(header, false); assert(loopHeader->getHeader() == header); Node* checkNode = info->branchInst->getNode(); bodyANodes.clear(); bodyBNodes.clear(); calculateReachableNodesInLoop(loopHeader, loopHeader->getHeader(), checkNode, bodyANodes); calculateReachableNodesInLoop(loopHeader, checkNode, NULL, bodyBNodes); bodyANodes.intersectWith(bodyBNodes); bool checkNodeIsJunctionPoint = bodyANodes.isEmpty(); if (!checkNodeIsJunctionPoint) { if (Log::isEnabled()) { Log::out()<<"Check node is not a junction point -> removing from the list: branch inst id=I"<<info->branchInst->getId()<<std::endl; } *it=NULL; continue; } //check if branch semantic is OK ComparisonModifier cmpMod = info->branchInst->getModifier().getComparisonModifier(); if (cmpMod!=Cmp_GT && cmpMod!=Cmp_GTE && cmpMod!=Cmp_GT_Un && cmpMod!=Cmp_GTE_Un) { if (Log::isEnabled()) { Log::out()<<"Branch is not a range comparison -> removing from the list: branch inst id=I"<<info->branchInst->getId()<<std::endl; } *it=NULL; continue; } //check config settings bool failed = false; int nodesInLoop = (int)loopHeader->getNodesInLoop().size(); const char* reason = "unknown"; if (nodesInLoop > flags.largeLoopSize) { reason = "loop is too large"; failed = true; } else if (hasProfile) { int headHotness = (int)(header->getExecCount()*100.0 / cfg.getEntryNode()->getExecCount()); int minHeaderHotness= nodesInLoop <= flags.smallLoopSize ? flags.smallLoopHotness : nodesInLoop <= flags.mediumLoopSize ? flags.mediumLoopHotness : flags.largeLoopHotness; info->unrollCount = nodesInLoop <= flags.smallLoopSize ? flags.smallLoopUnrollCount : nodesInLoop <= flags.mediumLoopSize? flags.mediumLoopUnrollCount: flags.largeLoopUnrollCount; failed = headHotness < minHeaderHotness || info->unrollCount < 1; if (failed) { reason = "loop is too cold"; } } if (failed) { if (Log::isEnabled()) { Log::out()<<"Loop does not match unroll configuration ("<<reason<<") -> removing from the list: branch inst id=I"<<info->branchInst->getId()<<std::endl; } *it=NULL; } } //filter out loops with multiple exits for (UnrollInfos::iterator it1 = loopsToUnroll.begin(), end = loopsToUnroll.end();it1!=end; ++it1) { const LoopUnrollInfo* info1 = *it1; if (info1== NULL) { continue; } Node* header=info1->header; for (UnrollInfos::iterator it2 = it1+1; it2!=end; ++it2) { const LoopUnrollInfo* info2 = *it2; if (info2!=NULL && header==info2->header) { if (Log::isEnabled()) { Log::out() << "Found multiple exits:"; FlowGraph::printLabel(Log::out(), header);Log::out()<<std::endl; } if (hasProfile) { Node* check1 = info1->branchInst->getNode(); Node* check2 = info2->branchInst->getNode(); if (check1->getExecCount() > check2->getExecCount()) { *it2 = NULL; } else { *it1 = NULL; } } else { // random selection *it2=NULL; } } } } loopsToUnroll.erase(std::remove(loopsToUnroll.begin(), loopsToUnroll.end(), (LoopUnrollInfo*)NULL), loopsToUnroll.end()); if (loopsToUnroll.empty()) { if (Log::isEnabled()) Log::out() << "--------No candidates to unroll left after filtering"<<std::endl; return; } //dessa CFG before unrolling -> need to duplicate regions and we can do it on dessa form only today { SSABuilder::deconvertSSA(&cfg, irm.getOpndManager()); irm.setInSsa(false); } if (Log::isEnabled()) { Log::out()<<"--------Loops to unroll after filtering : n="<<loopsToUnroll.size()<<std::endl; for (UnrollInfos::const_iterator it = loopsToUnroll.begin(), end = loopsToUnroll.end();it!=end; ++it) { const LoopUnrollInfo* info = *it; info->print(Log::out()); Log::out()<<std::endl; } } for (UnrollInfos::const_iterator it = loopsToUnroll.begin(), end = loopsToUnroll.end();it!=end; ++it) { const LoopUnrollInfo* info = *it; doUnroll(mm, irm, info, flags); } };
void PrintEquelleASTVisitor::visit(LoopNode& node) { std::cout << indent() << "For " << node.loopVariable() << " In " << node.loopSet() << " {"; ++indent_; endl(); }
void PrintASTVisitor::visit(LoopNode& node) { std::cout << indent() << "LoopNode: For " << node.loopVariable() << " In " << node.loopSet() << "\n"; ++indent_; }
LoopNode* handleLoopStart(const std::string& loop_variable, const std::string& loop_set) { LoopNode* node = new LoopNode(loop_variable, loop_set); node->setLocation(FileLocation(yylineno)); return node; }