//------------------------------compute_separating_interferences--------------- // Factored code from copy_copy that computes extra interferences from // lengthening a live range by double-coalescing. uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) { assert(!lrgs(lr1)._fat_proj, "cannot coalesce fat_proj"); assert(!lrgs(lr2)._fat_proj, "cannot coalesce fat_proj"); Node *prev_copy = dst_copy->in(dst_copy->is_Copy()); Block *b2 = b; uint bindex2 = bindex; while( 1 ) { // Find previous instruction bindex2--; // Chain backwards 1 instruction while( bindex2 == 0 ) { // At block start, find prior block assert( b2->num_preds() == 2, "cannot double coalesce across c-flow" ); b2 = _phc._cfg._bbs[b2->pred(1)->_idx]; bindex2 = b2->end_idx()-1; } // Get prior instruction assert(bindex2 < b2->_nodes.size(), "index out of bounds"); Node *x = b2->_nodes[bindex2]; if( x == prev_copy ) { // Previous copy in copy chain? if( prev_copy == src_copy)// Found end of chain and all interferences break; // So break out of loop // Else work back one in copy chain prev_copy = prev_copy->in(prev_copy->is_Copy()); } else { // Else collect interferences uint lidx = _phc.Find(x); // Found another def of live-range being stretched? if( lidx == lr1 ) return max_juint; if( lidx == lr2 ) return max_juint; // If we attempt to coalesce across a bound def if( lrgs(lidx).is_bound() ) { // Do not let the coalesced LRG expect to get the bound color rm.SUBTRACT( lrgs(lidx).mask() ); // Recompute rm_size rm_size = rm.Size(); //if( rm._flags ) rm_size += 1000000; if( reg_degree >= rm_size ) return max_juint; } if( rm.overlap(lrgs(lidx).mask()) ) { // Insert lidx into union LRG; returns TRUE if actually inserted if( _ulr.insert(lidx) ) { // Infinite-stack neighbors do not alter colorability, as they // can always color to some other color. if( !lrgs(lidx).mask().is_AllStack() ) { // If this coalesce will make any new neighbor uncolorable, // do not coalesce. if( lrgs(lidx).just_lo_degree() ) return max_juint; // Bump our degree if( ++reg_degree >= rm_size ) return max_juint; } // End of if not infinite-stack neighbor } // End of if actually inserted } // End of if live range overlaps } // End of else collect interferences for 1 node } // End of while forever, scan back for interferences return reg_degree; }
//------------------------------copy_copy-------------------------------------- // See if I can coalesce a series of multiple copies together. I need the // final dest copy and the original src copy. They can be the same Node. // Compute the compatible register masks. bool PhaseConservativeCoalesce::copy_copy( Node *dst_copy, Node *src_copy, Block *b, uint bindex ) { if( !dst_copy->is_SpillCopy() ) return false; if( !src_copy->is_SpillCopy() ) return false; Node *src_def = src_copy->in(src_copy->is_Copy()); uint lr1 = _phc.Find(dst_copy); uint lr2 = _phc.Find(src_def ); // Same live ranges already? if( lr1 == lr2 ) return false; // Interfere? if( _phc._ifg->test_edge_sq( lr1, lr2 ) ) return false; // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK. if( !lrgs(lr1)._is_oop && lrgs(lr2)._is_oop ) // not an oop->int cast return false; // Coalescing between an aligned live range and a mis-aligned live range? // No, no! Alignment changes how we count degree. if( lrgs(lr1)._fat_proj != lrgs(lr2)._fat_proj ) return false; // Sort; use smaller live-range number Node *lr1_node = dst_copy; Node *lr2_node = src_def; if( lr1 > lr2 ) { uint tmp = lr1; lr1 = lr2; lr2 = tmp; lr1_node = src_def; lr2_node = dst_copy; } // Check for compatibility of the 2 live ranges by // intersecting their allowed register sets. RegMask rm = lrgs(lr1).mask(); rm.AND(lrgs(lr2).mask()); // Number of bits free uint rm_size = rm.Size(); if (UseFPUForSpilling && rm.is_AllStack() ) { // Don't coalesce when frequency difference is large Block *dst_b = _phc._cfg._bbs[dst_copy->_idx]; Block *src_def_b = _phc._cfg._bbs[src_def->_idx]; if (src_def_b->_freq > 10*dst_b->_freq ) return false; } // If we can use any stack slot, then effective size is infinite if( rm.is_AllStack() ) rm_size += 1000000; // Incompatible masks, no way to coalesce if( rm_size == 0 ) return false; // Another early bail-out test is when we are double-coalescing and the // 2 copies are separated by some control flow. if( dst_copy != src_copy ) { Block *src_b = _phc._cfg._bbs[src_copy->_idx]; Block *b2 = b; while( b2 != src_b ) { if( b2->num_preds() > 2 ){// Found merge-point _phc._lost_opp_cflow_coalesce++; // extra record_bias commented out because Chris believes it is not // productive. Since we can record only 1 bias, we want to choose one // that stands a chance of working and this one probably does not. //record_bias( _phc._lrgs, lr1, lr2 ); return false; // To hard to find all interferences } b2 = _phc._cfg._bbs[b2->pred(1)->_idx]; } } // Union the two interference sets together into '_ulr' uint reg_degree = _ulr.lrg_union( lr1, lr2, rm_size, _phc._ifg, rm ); if( reg_degree >= rm_size ) { record_bias( _phc._ifg, lr1, lr2 ); return false; } // Now I need to compute all the interferences between dst_copy and // src_copy. I'm not willing visit the entire interference graph, so // I limit my search to things in dst_copy's block or in a straight // line of previous blocks. I give up at merge points or when I get // more interferences than my degree. I can stop when I find src_copy. if( dst_copy != src_copy ) { reg_degree = compute_separating_interferences(dst_copy, src_copy, b, bindex, rm, rm_size, reg_degree, lr1, lr2 ); if( reg_degree == max_juint ) { record_bias( _phc._ifg, lr1, lr2 ); return false; } } // End of if dst_copy & src_copy are different // ---- THE COMBINED LRG IS COLORABLE ---- // YEAH - Now coalesce this copy away assert( lrgs(lr1).num_regs() == lrgs(lr2).num_regs(), "" ); IndexSet *n_lr1 = _phc._ifg->neighbors(lr1); IndexSet *n_lr2 = _phc._ifg->neighbors(lr2); // Update the interference graph update_ifg(lr1, lr2, n_lr1, n_lr2); _ulr.remove(lr1); // Uncomment the following code to trace Coalescing in great detail. // //if (false) { // tty->cr(); // tty->print_cr("#######################################"); // tty->print_cr("union %d and %d", lr1, lr2); // n_lr1->dump(); // n_lr2->dump(); // tty->print_cr("resulting set is"); // _ulr.dump(); //} // Replace n_lr1 with the new combined live range. _ulr will use // n_lr1's old memory on the next iteration. n_lr2 is cleared to // send its internal memory to the free list. _ulr.swap(n_lr1); _ulr.clear(); n_lr2->clear(); lrgs(lr1).set_degree( _phc._ifg->effective_degree(lr1) ); lrgs(lr2).set_degree( 0 ); // Join live ranges. Merge larger into smaller. Union lr2 into lr1 in the // union-find tree union_helper( lr1_node, lr2_node, lr1, lr2, src_def, dst_copy, src_copy, b, bindex ); // Combine register restrictions lrgs(lr1).set_mask(rm); lrgs(lr1).compute_set_mask_size(); lrgs(lr1)._cost += lrgs(lr2)._cost; lrgs(lr1)._area += lrgs(lr2)._area; // While its uncommon to successfully coalesce live ranges that started out // being not-lo-degree, it can happen. In any case the combined coalesced // live range better Simplify nicely. lrgs(lr1)._was_lo = 1; // kinda expensive to do all the time //tty->print_cr("warning: slow verify happening"); //_phc._ifg->verify( &_phc ); return true; }