//------------------------------match------------------------------------------ // Construct projections for control, I/O, memory-fields, ..., and // return result(s) along with their RegMask info Node *CallNode::match( const ProjNode *proj, const Matcher *match ) { switch (proj->_con) { case TypeFunc::Control: case TypeFunc::I_O: case TypeFunc::Memory: return new (1) MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj); case TypeFunc::Parms+1: // For LONG & DOUBLE returns assert(tf()->_range->field_at(TypeFunc::Parms+1) == Type::HALF, ""); // 2nd half of doubles and longs return new (1) MachProjNode(this,proj->_con, RegMask::Empty, (uint)OptoReg::Bad); case TypeFunc::Parms: { // Normal returns uint ideal_reg = Matcher::base2reg[tf()->range()->field_at(TypeFunc::Parms)->base()]; OptoRegPair regs = is_CallRuntime() ? match->c_return_value(ideal_reg,true) // Calls into C runtime : match-> return_value(ideal_reg,true); // Calls into compiled Java code RegMask rm = RegMask(regs.lo()); if( regs.hi() != OptoReg::Bad ) rm.Insert( regs.hi() ); return new (1) MachProjNode(this,proj->_con,rm,ideal_reg); } case TypeFunc::ReturnAdr: case TypeFunc::FramePtr: default: ShouldNotReachHere(); } return NULL; }
//------------------------------sched_call------------------------------------- uint Block::sched_call( Matcher &m, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { RegMask regs; // Schedule all the users of the call right now. All the users are // projection Nodes, so they must be scheduled next to the call. // Collect all the defined registers. for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { Node* n = mcall->fast_out(i); assert( n->Opcode()==Op_MachProj, "" ); --ready_cnt[n->_idx]; assert( !ready_cnt[n->_idx], "" ); // Schedule next to call _nodes.map(node_cnt++, n); // Collect defined registers regs.OR(n->out_RegMask()); // Check for scheduling the next control-definer if( n->bottom_type() == Type::CONTROL ) // Warm up next pile of heuristic bits needed_for_next_call(n, next_call, bbs); // Children of projections are now all ready for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); // Get user if( bbs[m->_idx] != this ) continue; if( m->is_Phi() ) continue; if( !--ready_cnt[m->_idx] ) worklist.push(m); } } // Act as if the call defines the Frame Pointer. // Certainly the FP is alive and well after the call. regs.Insert(m.c_frame_pointer()); // Set all registers killed and not already defined by the call. uint r_cnt = mcall->tf()->range()->cnt(); int op = mcall->ideal_Opcode(); MachProjNode *proj = new (1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); bbs.map(proj->_idx,this); _nodes.insert(node_cnt++, proj); for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { if( !regs.Member(r) ) { // Not already defined by the call // Save-on-call register? if( (m._register_save_policy[r] == 'C') || (m._register_save_policy[r] == 'A') || ((m._register_save_policy[r] == 'E') && (op == Op_CallRuntime || op == Op_CallNative || op == Op_CallInterpreter || op == Op_CallLeaf)) ) { proj->_rout.Insert(r); } } } return node_cnt; }
//------------------------------compute_separating_interferences--------------- // Factored code from copy_copy that computes extra interferences from // lengthening a live range by double-coalescing. uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) { assert(!lrgs(lr1)._fat_proj, "cannot coalesce fat_proj"); assert(!lrgs(lr2)._fat_proj, "cannot coalesce fat_proj"); Node *prev_copy = dst_copy->in(dst_copy->is_Copy()); Block *b2 = b; uint bindex2 = bindex; while( 1 ) { // Find previous instruction bindex2--; // Chain backwards 1 instruction while( bindex2 == 0 ) { // At block start, find prior block assert( b2->num_preds() == 2, "cannot double coalesce across c-flow" ); b2 = _phc._cfg._bbs[b2->pred(1)->_idx]; bindex2 = b2->end_idx()-1; } // Get prior instruction assert(bindex2 < b2->_nodes.size(), "index out of bounds"); Node *x = b2->_nodes[bindex2]; if( x == prev_copy ) { // Previous copy in copy chain? if( prev_copy == src_copy)// Found end of chain and all interferences break; // So break out of loop // Else work back one in copy chain prev_copy = prev_copy->in(prev_copy->is_Copy()); } else { // Else collect interferences uint lidx = _phc.Find(x); // Found another def of live-range being stretched? if( lidx == lr1 ) return max_juint; if( lidx == lr2 ) return max_juint; // If we attempt to coalesce across a bound def if( lrgs(lidx).is_bound() ) { // Do not let the coalesced LRG expect to get the bound color rm.SUBTRACT( lrgs(lidx).mask() ); // Recompute rm_size rm_size = rm.Size(); //if( rm._flags ) rm_size += 1000000; if( reg_degree >= rm_size ) return max_juint; } if( rm.overlap(lrgs(lidx).mask()) ) { // Insert lidx into union LRG; returns TRUE if actually inserted if( _ulr.insert(lidx) ) { // Infinite-stack neighbors do not alter colorability, as they // can always color to some other color. if( !lrgs(lidx).mask().is_AllStack() ) { // If this coalesce will make any new neighbor uncolorable, // do not coalesce. if( lrgs(lidx).just_lo_degree() ) return max_juint; // Bump our degree if( ++reg_degree >= rm_size ) return max_juint; } // End of if not infinite-stack neighbor } // End of if actually inserted } // End of if live range overlaps } // End of else collect interferences for 1 node } // End of while forever, scan back for interferences return reg_degree; }
//------------------------------add_call_kills------------------------------------- // helper function that adds caller save registers to MachProjNode static void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) { // Fill in the kill mask for the call for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { if( !regs.Member(r) ) { // Not already defined by the call // Save-on-call register? if ((save_policy[r] == 'C') || (save_policy[r] == 'A') || ((save_policy[r] == 'E') && exclude_soe)) { proj->_rout.Insert(r); } } } }
//------------------------------schedule_local--------------------------------- // Topological sort within a block. Someday become a real scheduler. bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) { // Already "sorted" are the block start Node (as the first entry), and // the block-ending Node and any trailing control projections. We leave // these alone. PhiNodes and ParmNodes are made to follow the block start // Node. Everything else gets topo-sorted. #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print_cr("# --- schedule_local B%d, before: ---", block->_pre_order); for (uint i = 0;i < block->number_of_nodes(); i++) { tty->print("# "); block->get_node(i)->fast_dump(); } tty->print_cr("#"); } #endif // RootNode is already sorted if (block->number_of_nodes() == 1) { return true; } // Move PhiNodes and ParmNodes from 1 to cnt up to the start uint node_cnt = block->end_idx(); uint phi_cnt = 1; uint i; for( i = 1; i<node_cnt; i++ ) { // Scan for Phi Node *n = block->get_node(i); if( n->is_Phi() || // Found a PhiNode or ParmNode (n->is_Proj() && n->in(0) == block->head()) ) { // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt block->map_node(block->get_node(phi_cnt), i); block->map_node(n, phi_cnt++); // swap Phi/Parm up front } else { // All others // Count block-local inputs to 'n' uint cnt = n->len(); // Input count uint local = 0; for( uint j=0; j<cnt; j++ ) { Node *m = n->in(j); if( m && get_block_for_node(m) == block && !m->is_top() ) local++; // One more block-local input } ready_cnt.at_put(n->_idx, local); // Count em up #ifdef ASSERT if( UseConcMarkSweepGC || UseG1GC ) { if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { // Check the precedence edges for (uint prec = n->req(); prec < n->len(); prec++) { Node* oop_store = n->in(prec); if (oop_store != NULL) { assert(get_block_for_node(oop_store)->_dom_depth <= block->_dom_depth, "oop_store must dominate card-mark"); } } } } #endif // A few node types require changing a required edge to a precedence edge // before allocation. if( n->is_Mach() && n->req() > TypeFunc::Parms && (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire || n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) { // MemBarAcquire could be created without Precedent edge. // del_req() replaces the specified edge with the last input edge // and then removes the last edge. If the specified edge > number of // edges the last edge will be moved outside of the input edges array // and the edge will be lost. This is why this code should be // executed only when Precedent (== TypeFunc::Parms) edge is present. Node *x = n->in(TypeFunc::Parms); n->del_req(TypeFunc::Parms); n->add_prec(x); } } } for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count ready_cnt.at_put(block->get_node(i2)->_idx, 0); // All the prescheduled guys do not hold back internal nodes uint i3; for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled Node *n = block->get_node(i3); // Get pre-scheduled for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); if (get_block_for_node(m) == block) { // Local-block user int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count } } } Node_List delay; // Make a worklist Node_List worklist; for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist Node *m = block->get_node(i4); if( !ready_cnt.at(m->_idx) ) { // Zero ready count? if (m->is_iteratively_computed()) { // Push induction variable increments last to allow other uses // of the phi to be scheduled first. The select() method breaks // ties in scheduling by worklist order. delay.push(m); } else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) { // Force the CreateEx to the top of the list so it's processed // first and ends up at the start of the block. worklist.insert(0, m); } else { worklist.push(m); // Then on to worklist! } } } while (delay.size()) { Node* d = delay.pop(); worklist.push(d); } // Warm up the 'next_call' heuristic bits needed_for_next_call(block, block->head(), next_call); #ifndef PRODUCT if (trace_opto_pipelining()) { for (uint j=0; j< block->number_of_nodes(); j++) { Node *n = block->get_node(j); int idx = n->_idx; tty->print("# ready cnt:%3d ", ready_cnt.at(idx)); tty->print("latency:%3d ", get_latency_for_node(n)); tty->print("%4d: %s\n", idx, n->Name()); } } #endif uint max_idx = (uint)ready_cnt.length(); // Pull from worklist and schedule while( worklist.size() ) { // Worklist is not ready #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("# ready list:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %d", n->_idx); } tty->cr(); } #endif // Select and pop a ready guy from worklist Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt); block->map_node(n, phi_cnt++); // Schedule him next #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("# select %d: %s", n->_idx, n->Name()); tty->print(", latency:%d", get_latency_for_node(n)); n->dump(); if (Verbose) { tty->print("# ready list:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %d", n->_idx); } tty->cr(); } } #endif if( n->is_MachCall() ) { MachCallNode *mcall = n->as_MachCall(); phi_cnt = sched_call(block, phi_cnt, worklist, ready_cnt, mcall, next_call); continue; } if (n->is_Mach() && n->as_Mach()->has_call()) { RegMask regs; regs.Insert(_matcher.c_frame_pointer()); regs.OR(n->out_RegMask()); MachProjNode *proj = new (C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj ); map_node_to_block(proj, block); block->insert_node(proj, phi_cnt++); add_call_kills(proj, regs, _matcher._c_reg_save_policy, false); } // Children are now all ready for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { Node* m = n->fast_out(i5); // Get user if (get_block_for_node(m) != block) { continue; } if( m->is_Phi() ) continue; if (m->_idx >= max_idx) { // new node, skip it assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); continue; } int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); if( m_cnt == 0 ) worklist.push(m); } } if( phi_cnt != block->end_idx() ) { // did not schedule all. Retry, Bailout, or Die if (C->subsume_loads() == true && !C->failing()) { // Retry with subsume_loads == false // If this is the first failure, the sentinel string will "stick" // to the Compile object, and the C2Compiler will see it and retry. C->record_failure(C2Compiler::retry_no_subsuming_loads()); } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; } #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print_cr("#"); tty->print_cr("# after schedule_local"); for (uint i = 0;i < block->number_of_nodes();i++) { tty->print("# "); block->get_node(i)->fast_dump(); } tty->cr(); } #endif return true; }
//------------------------------sched_call------------------------------------- uint PhaseCFG::sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call) { RegMask regs; // Schedule all the users of the call right now. All the users are // projection Nodes, so they must be scheduled next to the call. // Collect all the defined registers. for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { Node* n = mcall->fast_out(i); assert( n->is_MachProj(), "" ); int n_cnt = ready_cnt.at(n->_idx)-1; ready_cnt.at_put(n->_idx, n_cnt); assert( n_cnt == 0, "" ); // Schedule next to call block->map_node(n, node_cnt++); // Collect defined registers regs.OR(n->out_RegMask()); // Check for scheduling the next control-definer if( n->bottom_type() == Type::CONTROL ) // Warm up next pile of heuristic bits needed_for_next_call(block, n, next_call); // Children of projections are now all ready for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); // Get user if(get_block_for_node(m) != block) { continue; } if( m->is_Phi() ) continue; int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); if( m_cnt == 0 ) worklist.push(m); } } // Act as if the call defines the Frame Pointer. // Certainly the FP is alive and well after the call. regs.Insert(_matcher.c_frame_pointer()); // Set all registers killed and not already defined by the call. uint r_cnt = mcall->tf()->range()->cnt(); int op = mcall->ideal_Opcode(); MachProjNode *proj = new (C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); map_node_to_block(proj, block); block->insert_node(proj, node_cnt++); // Select the right register save policy. const char * save_policy; switch (op) { case Op_CallRuntime: case Op_CallLeaf: case Op_CallLeafNoFP: // Calling C code so use C calling convention save_policy = _matcher._c_reg_save_policy; break; case Op_CallStaticJava: case Op_CallDynamicJava: // Calling Java code so use Java calling convention save_policy = _matcher._register_save_policy; break; default: ShouldNotReachHere(); } // When using CallRuntime mark SOE registers as killed by the call // so values that could show up in the RegisterMap aren't live in a // callee saved register since the register wouldn't know where to // find them. CallLeaf and CallLeafNoFP are ok because they can't // have debug info on them. Strictly speaking this only needs to be // done for oops since idealreg2debugmask takes care of debug info // references but there no way to handle oops differently than other // pointers as far as the kill mask goes. bool exclude_soe = op == Op_CallRuntime; // If the call is a MethodHandle invoke, we need to exclude the // register which is used to save the SP value over MH invokes from // the mask. Otherwise this register could be used for // deoptimization information. if (op == Op_CallStaticJava) { MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall; if (mcallstaticjava->_method_handle_invoke) proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask()); } add_call_kills(proj, regs, save_policy, exclude_soe); return node_cnt; }
uint IndexSet::lrg_union(uint lr1, uint lr2, const uint fail_degree, const PhaseIFG *ifg, const RegMask &mask ) { IndexSet *one = ifg->neighbors(lr1); IndexSet *two = ifg->neighbors(lr2); LRG &lrg1 = ifg->lrgs(lr1); LRG &lrg2 = ifg->lrgs(lr2); #ifdef ASSERT assert(_max_elements == one->_max_elements, "max element mismatch"); check_watch("union destination"); one->check_watch("union source"); two->check_watch("union source"); #endif // Compute the degree of the combined live-range. The combined // live-range has the union of the original live-ranges' neighbors set as // well as the neighbors of all intermediate copies, minus those neighbors // that can not use the intersected allowed-register-set. // Copy the larger set. Insert the smaller set into the larger. if (two->count() > one->count()) { IndexSet *temp = one; one = two; two = temp; } clear(); // Used to compute degree of register-only interferences. Infinite-stack // neighbors do not alter colorability, as they can always color to some // other color. (A variant of the Briggs assertion) uint reg_degree = 0; uint element; // Load up the combined interference set with the neighbors of one IndexSetIterator elements(one); while ((element = elements.next()) != 0) { LRG &lrg = ifg->lrgs(element); if (mask.overlap(lrg.mask())) { insert(element); if( !lrg.mask().is_AllStack() ) { reg_degree += lrg1.compute_degree(lrg); if( reg_degree >= fail_degree ) return reg_degree; } else { // !!!!! Danger! No update to reg_degree despite having a neighbor. // A variant of the Briggs assertion. // Not needed if I simplify during coalesce, ala George/Appel. assert( lrg.lo_degree(), "" ); } } } // Add neighbors of two as well IndexSetIterator elements2(two); while ((element = elements2.next()) != 0) { LRG &lrg = ifg->lrgs(element); if (mask.overlap(lrg.mask())) { if (insert(element)) { if( !lrg.mask().is_AllStack() ) { reg_degree += lrg2.compute_degree(lrg); if( reg_degree >= fail_degree ) return reg_degree; } else { // !!!!! Danger! No update to reg_degree despite having a neighbor. // A variant of the Briggs assertion. // Not needed if I simplify during coalesce, ala George/Appel. assert( lrg.lo_degree(), "" ); } } } } return reg_degree; }
//------------------------------copy_copy-------------------------------------- // See if I can coalesce a series of multiple copies together. I need the // final dest copy and the original src copy. They can be the same Node. // Compute the compatible register masks. bool PhaseConservativeCoalesce::copy_copy( Node *dst_copy, Node *src_copy, Block *b, uint bindex ) { if( !dst_copy->is_SpillCopy() ) return false; if( !src_copy->is_SpillCopy() ) return false; Node *src_def = src_copy->in(src_copy->is_Copy()); uint lr1 = _phc.Find(dst_copy); uint lr2 = _phc.Find(src_def ); // Same live ranges already? if( lr1 == lr2 ) return false; // Interfere? if( _phc._ifg->test_edge_sq( lr1, lr2 ) ) return false; // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK. if( !lrgs(lr1)._is_oop && lrgs(lr2)._is_oop ) // not an oop->int cast return false; // Coalescing between an aligned live range and a mis-aligned live range? // No, no! Alignment changes how we count degree. if( lrgs(lr1)._fat_proj != lrgs(lr2)._fat_proj ) return false; // Sort; use smaller live-range number Node *lr1_node = dst_copy; Node *lr2_node = src_def; if( lr1 > lr2 ) { uint tmp = lr1; lr1 = lr2; lr2 = tmp; lr1_node = src_def; lr2_node = dst_copy; } // Check for compatibility of the 2 live ranges by // intersecting their allowed register sets. RegMask rm = lrgs(lr1).mask(); rm.AND(lrgs(lr2).mask()); // Number of bits free uint rm_size = rm.Size(); if (UseFPUForSpilling && rm.is_AllStack() ) { // Don't coalesce when frequency difference is large Block *dst_b = _phc._cfg._bbs[dst_copy->_idx]; Block *src_def_b = _phc._cfg._bbs[src_def->_idx]; if (src_def_b->_freq > 10*dst_b->_freq ) return false; } // If we can use any stack slot, then effective size is infinite if( rm.is_AllStack() ) rm_size += 1000000; // Incompatible masks, no way to coalesce if( rm_size == 0 ) return false; // Another early bail-out test is when we are double-coalescing and the // 2 copies are separated by some control flow. if( dst_copy != src_copy ) { Block *src_b = _phc._cfg._bbs[src_copy->_idx]; Block *b2 = b; while( b2 != src_b ) { if( b2->num_preds() > 2 ){// Found merge-point _phc._lost_opp_cflow_coalesce++; // extra record_bias commented out because Chris believes it is not // productive. Since we can record only 1 bias, we want to choose one // that stands a chance of working and this one probably does not. //record_bias( _phc._lrgs, lr1, lr2 ); return false; // To hard to find all interferences } b2 = _phc._cfg._bbs[b2->pred(1)->_idx]; } } // Union the two interference sets together into '_ulr' uint reg_degree = _ulr.lrg_union( lr1, lr2, rm_size, _phc._ifg, rm ); if( reg_degree >= rm_size ) { record_bias( _phc._ifg, lr1, lr2 ); return false; } // Now I need to compute all the interferences between dst_copy and // src_copy. I'm not willing visit the entire interference graph, so // I limit my search to things in dst_copy's block or in a straight // line of previous blocks. I give up at merge points or when I get // more interferences than my degree. I can stop when I find src_copy. if( dst_copy != src_copy ) { reg_degree = compute_separating_interferences(dst_copy, src_copy, b, bindex, rm, rm_size, reg_degree, lr1, lr2 ); if( reg_degree == max_juint ) { record_bias( _phc._ifg, lr1, lr2 ); return false; } } // End of if dst_copy & src_copy are different // ---- THE COMBINED LRG IS COLORABLE ---- // YEAH - Now coalesce this copy away assert( lrgs(lr1).num_regs() == lrgs(lr2).num_regs(), "" ); IndexSet *n_lr1 = _phc._ifg->neighbors(lr1); IndexSet *n_lr2 = _phc._ifg->neighbors(lr2); // Update the interference graph update_ifg(lr1, lr2, n_lr1, n_lr2); _ulr.remove(lr1); // Uncomment the following code to trace Coalescing in great detail. // //if (false) { // tty->cr(); // tty->print_cr("#######################################"); // tty->print_cr("union %d and %d", lr1, lr2); // n_lr1->dump(); // n_lr2->dump(); // tty->print_cr("resulting set is"); // _ulr.dump(); //} // Replace n_lr1 with the new combined live range. _ulr will use // n_lr1's old memory on the next iteration. n_lr2 is cleared to // send its internal memory to the free list. _ulr.swap(n_lr1); _ulr.clear(); n_lr2->clear(); lrgs(lr1).set_degree( _phc._ifg->effective_degree(lr1) ); lrgs(lr2).set_degree( 0 ); // Join live ranges. Merge larger into smaller. Union lr2 into lr1 in the // union-find tree union_helper( lr1_node, lr2_node, lr1, lr2, src_def, dst_copy, src_copy, b, bindex ); // Combine register restrictions lrgs(lr1).set_mask(rm); lrgs(lr1).compute_set_mask_size(); lrgs(lr1)._cost += lrgs(lr2)._cost; lrgs(lr1)._area += lrgs(lr2)._area; // While its uncommon to successfully coalesce live ranges that started out // being not-lo-degree, it can happen. In any case the combined coalesced // live range better Simplify nicely. lrgs(lr1)._was_lo = 1; // kinda expensive to do all the time //tty->print_cr("warning: slow verify happening"); //_phc._ifg->verify( &_phc ); return true; }
//------------------------------sched_call------------------------------------- uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { RegMask regs; // Schedule all the users of the call right now. All the users are // projection Nodes, so they must be scheduled next to the call. // Collect all the defined registers. for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { Node* n = mcall->fast_out(i); assert( n->Opcode()==Op_MachProj, "" ); --ready_cnt[n->_idx]; assert( !ready_cnt[n->_idx], "" ); // Schedule next to call _nodes.map(node_cnt++, n); // Collect defined registers regs.OR(n->out_RegMask()); // Check for scheduling the next control-definer if( n->bottom_type() == Type::CONTROL ) // Warm up next pile of heuristic bits needed_for_next_call(n, next_call, bbs); // Children of projections are now all ready for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); // Get user if( bbs[m->_idx] != this ) continue; if( m->is_Phi() ) continue; if( !--ready_cnt[m->_idx] ) worklist.push(m); } } // Act as if the call defines the Frame Pointer. // Certainly the FP is alive and well after the call. regs.Insert(matcher.c_frame_pointer()); // Set all registers killed and not already defined by the call. uint r_cnt = mcall->tf()->range()->cnt(); int op = mcall->ideal_Opcode(); MachProjNode *proj = new (matcher.C, 1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); bbs.map(proj->_idx,this); _nodes.insert(node_cnt++, proj); // Select the right register save policy. const char * save_policy; switch (op) { case Op_CallRuntime: case Op_CallLeaf: case Op_CallLeafNoFP: // Calling C code so use C calling convention save_policy = matcher._c_reg_save_policy; break; case Op_CallStaticJava: case Op_CallDynamicJava: // Calling Java code so use Java calling convention save_policy = matcher._register_save_policy; break; default: ShouldNotReachHere(); } // When using CallRuntime mark SOE registers as killed by the call // so values that could show up in the RegisterMap aren't live in a // callee saved register since the register wouldn't know where to // find them. CallLeaf and CallLeafNoFP are ok because they can't // have debug info on them. Strictly speaking this only needs to be // done for oops since idealreg2debugmask takes care of debug info // references but there no way to handle oops differently than other // pointers as far as the kill mask goes. bool exclude_soe = op == Op_CallRuntime; // If the call is a MethodHandle invoke, we need to exclude the // register which is used to save the SP value over MH invokes from // the mask. Otherwise this register could be used for // deoptimization information. if (op == Op_CallStaticJava) { MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall; if (mcallstaticjava->_method_handle_invoke) proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask()); } // Fill in the kill mask for the call for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { if( !regs.Member(r) ) { // Not already defined by the call // Save-on-call register? if ((save_policy[r] == 'C') || (save_policy[r] == 'A') || ((save_policy[r] == 'E') && exclude_soe)) { proj->_rout.Insert(r); } } } return node_cnt; }