/* * Find the immediate dominator of each block using Cooper, Harvey, and * Kennedy's "A Simple, Fast Dominance Algorithm", returned as a vector * of postorder ids, indexed by postorder id. */ IdomVector findDominators(const BlockList& blocks) { assert(isRPOSorted(blocks)); // Calculate immediate dominators with the iterative two-finger algorithm. // When it terminates, idom[post-id] will contain the post-id of the // immediate dominator of each block. idom[start] will be -1. This is // the general algorithm but it will only loop twice for loop-free graphs. auto const num_blocks = blocks.size(); IdomVector idom(num_blocks, -1); auto start = blocks.begin(); int start_id = (*start)->postId(); idom[start_id] = start_id; start++; for (bool changed = true; changed; ) { changed = false; // for each block after start, in reverse postorder for (auto it = start; it != blocks.end(); it++) { Block* block = *it; int b = block->postId(); // new_idom = any already-processed predecessor auto edge_it = block->preds().begin(); int new_idom = edge_it->from()->postId(); while (idom[new_idom] == -1) new_idom = (++edge_it)->from()->postId(); // for all other already-processed predecessors p of b for (auto& edge : block->preds()) { auto p = edge.from()->postId(); if (p != new_idom && idom[p] != -1) { // find earliest common predecessor of p and new_idom // (higher postIds are earlier in flow and in dom-tree). int b1 = p, b2 = new_idom; do { while (b1 < b2) b1 = idom[b1]; while (b2 < b1) b2 = idom[b2]; } while (b1 != b2); new_idom = b1; } } if (idom[b] != new_idom) { idom[b] = new_idom; changed = true; } } } idom[start_id] = -1; // start has no idom. return idom; }
/* * Find the immediate dominator of each block using Cooper, Harvey, and * Kennedy's "A Simple, Fast Dominance Algorithm", returned as a vector * of Block*, indexed by block. IdomVector[b] == nullptr if b has no * dominator. This is the case for the entry block and any blocks not * reachable from the entry block. */ IdomVector findDominators(const IRUnit& unit, const BlocksWithIds& blockIds) { auto& blocks = blockIds.blocks; auto& postIds = blockIds.ids; // Calculate immediate dominators with the iterative two-finger algorithm. // When it terminates, idom[post-id] will contain the post-id of the // immediate dominator of each block. idom[start] will be -1. This is // the general algorithm but it will only loop twice for loop-free graphs. IdomVector idom(unit, nullptr); auto start = blocks.begin(); auto entry = *start; idom[entry] = entry; start++; for (bool changed = true; changed; ) { changed = false; // for each block after start, in reverse postorder for (auto it = start; it != blocks.end(); it++) { Block* block = *it; // p1 = any already-processed predecessor auto predIter = block->preds().begin(); auto predEnd = block->preds().end(); auto p1 = predIter->inst()->block(); while (!idom[p1]) p1 = (++predIter)->inst()->block(); // for all other already-processed predecessors p2 of block for (++predIter; predIter != predEnd; ++predIter) { auto p2 = predIter->inst()->block(); if (p2 == p1 || !idom[p2]) continue; // find earliest common predecessor of p1 and p2 // (higher postIds are earlier in flow and in dom-tree). do { while (postIds[p1] < postIds[p2]) p1 = idom[p1]; while (postIds[p2] < postIds[p1]) p2 = idom[p2]; } while (p1 != p2); } if (idom[block] != p1) { idom[block] = p1; changed = true; } } } idom[entry] = nullptr; // entry has no dominator. return idom; }
//------------------------------create_new_if_for_predicate------------------------ // create a new if above the uct_if_pattern for the predicate to be promoted. // // before after // ---------- ---------- // ctrl ctrl // | | // | | // v v // iff new_iff // / \ / \ // / \ / \ // v v v v // uncommon_proj cont_proj if_uct if_cont // \ | | | | // \ | | | | // v v v | v // rgn loop | iff // | | / \ // | | / \ // v | v v // uncommon_trap | uncommon_proj cont_proj // \ \ | | // \ \ | | // v v v v // rgn loop // | // | // v // uncommon_trap // // // We will create a region to guard the uct call if there is no one there. // The true projecttion (if_cont) of the new_iff is returned. // This code is also used to clone predicates to clonned loops. ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, Deoptimization::DeoptReason reason) { assert(is_uncommon_trap_if_pattern(cont_proj, reason), "must be a uct if pattern!"); IfNode* iff = cont_proj->in(0)->as_If(); ProjNode *uncommon_proj = iff->proj_out(1 - cont_proj->_con); Node *rgn = uncommon_proj->unique_ctrl_out(); assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); uint proj_index = 1; // region's edge corresponding to uncommon_proj if (!rgn->is_Region()) { // create a region to guard the call assert(rgn->is_Call(), "must be call uct"); CallNode* call = rgn->as_Call(); IdealLoopTree* loop = get_loop(call); rgn = new (C) RegionNode(1); rgn->add_req(uncommon_proj); register_control(rgn, loop, uncommon_proj); _igvn.hash_delete(call); call->set_req(0, rgn); // When called from beautify_loops() idom is not constructed yet. if (_idom != NULL) { set_idom(call, rgn, dom_depth(rgn)); } } else { // Find region's edge corresponding to uncommon_proj for (; proj_index < rgn->req(); proj_index++) if (rgn->in(proj_index) == uncommon_proj) break; assert(proj_index < rgn->req(), "sanity"); } Node* entry = iff->in(0); if (new_entry != NULL) { // Clonning the predicate to new location. entry = new_entry; } // Create new_iff IdealLoopTree* lp = get_loop(entry); IfNode *new_iff = iff->clone()->as_If(); new_iff->set_req(0, entry); register_control(new_iff, lp, entry); Node *if_cont = new (C) IfTrueNode(new_iff); Node *if_uct = new (C) IfFalseNode(new_iff); if (cont_proj->is_IfFalse()) { // Swap Node* tmp = if_uct; if_uct = if_cont; if_cont = tmp; } register_control(if_cont, lp, new_iff); register_control(if_uct, get_loop(rgn), new_iff); // if_uct to rgn _igvn.hash_delete(rgn); rgn->add_req(if_uct); // When called from beautify_loops() idom is not constructed yet. if (_idom != NULL) { Node* ridom = idom(rgn); Node* nrdom = dom_lca(ridom, new_iff); set_idom(rgn, nrdom, dom_depth(rgn)); } // If rgn has phis add new edges which has the same // value as on original uncommon_proj pass. assert(rgn->in(rgn->req() -1) == if_uct, "new edge should be last"); bool has_phi = false; for (DUIterator_Fast imax, i = rgn->fast_outs(imax); i < imax; i++) { Node* use = rgn->fast_out(i); if (use->is_Phi() && use->outcnt() > 0) { assert(use->in(0) == rgn, ""); _igvn.rehash_node_delayed(use); use->add_req(use->in(proj_index)); has_phi = true; } } assert(!has_phi || rgn->req() > 3, "no phis when region is created"); if (new_entry == NULL) { // Attach if_cont to iff _igvn.hash_delete(iff); iff->set_req(0, if_cont); if (_idom != NULL) { set_idom(iff, if_cont, dom_depth(iff)); } } return if_cont->as_Proj(); }
//------------------------------do_split_if------------------------------------ // Found an If getting its condition-code input from a Phi in the same block. // Split thru the Region. void PhaseIdealLoop::do_split_if( Node *iff ) { #ifndef PRODUCT if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("Split-if"); #endif C->set_major_progress(); Node *region = iff->in(0); Node *region_dom = idom(region); // We are going to clone this test (and the control flow with it) up through // the incoming merge point. We need to empty the current basic block. // Clone any instructions which must be in this block up through the merge // point. DUIterator i, j; bool progress = true; while (progress) { progress = false; for (i = region->outs(); region->has_out(i); i++) { Node* n = region->out(i); if( n == region ) continue; // The IF to be split is OK. if( n == iff ) continue; if( !n->is_Phi() ) { // Found pinned memory op or such if (split_up(n, region, iff)) { i = region->refresh_out_pos(i); progress = true; } continue; } assert( n->in(0) == region, "" ); // Recursively split up all users of a Phi for (j = n->outs(); n->has_out(j); j++) { Node* m = n->out(j); // If m is dead, throw it away, and declare progress if (_nodes[m->_idx] == NULL) { _igvn.remove_dead_node(m); // fall through } else if (m != iff && split_up(m, region, iff)) { // fall through } else { continue; } // Something unpredictable changed. // Tell the iterators to refresh themselves, and rerun the loop. i = region->refresh_out_pos(i); j = region->refresh_out_pos(j); progress = true; } } } // Now we have no instructions in the block containing the IF. // Split the IF. Node *new_iff = split_thru_region( iff, region ); // Replace both uses of 'new_iff' with Regions merging True/False // paths. This makes 'new_iff' go dead. Node *old_false, *old_true; Node *new_false, *new_true; for (DUIterator_Last j2min, j2 = iff->last_outs(j2min); j2 >= j2min; --j2) { Node *ifp = iff->last_out(j2); assert( ifp->Opcode() == Op_IfFalse || ifp->Opcode() == Op_IfTrue, "" ); ifp->set_req(0, new_iff); Node *ifpx = split_thru_region( ifp, region ); // Replace 'If' projection of a Region with a Region of // 'If' projections. ifpx->set_req(0, ifpx); // A TRUE RegionNode // Setup dominator info set_idom(ifpx, region_dom, dom_depth(region_dom) + 1); // Check for splitting loop tails if( get_loop(iff)->tail() == ifp ) get_loop(iff)->_tail = ifpx; // Replace in the graph with lazy-update mechanism new_iff->set_req(0, new_iff); // hook self so it does not go dead lazy_replace_proj( ifp, ifpx ); new_iff->set_req(0, region); // Record bits for later xforms if( ifp->Opcode() == Op_IfFalse ) { old_false = ifp; new_false = ifpx; } else { old_true = ifp; new_true = ifpx; } } _igvn.remove_dead_node(new_iff); // Lazy replace IDOM info with the region's dominator lazy_replace( iff, region_dom ); // Now make the original merge point go dead, by handling all its uses. small_cache region_cache; // Preload some control flow in region-cache region_cache.lru_insert( new_false, new_false ); region_cache.lru_insert( new_true , new_true ); // Now handle all uses of the splitting block for (DUIterator_Last kmin, k = region->last_outs(kmin); k >= kmin; --k) { Node* phi = region->last_out(k); if( !phi->in(0) ) { // Dead phi? Remove it _igvn.remove_dead_node(phi); continue; } assert( phi->in(0) == region, "" ); if( phi == region ) { // Found the self-reference phi->set_req(0, NULL); continue; // Break the self-cycle } // Expected common case: Phi hanging off of Region if( phi->is_Phi() ) { // Need a per-def cache. Phi represents a def, so make a cache small_cache phi_cache; // Inspect all Phi uses to make the Phi go dead for (DUIterator_Last lmin, l = phi->last_outs(lmin); l >= lmin; --l) { Node* use = phi->last_out(l); // Compute the new DEF for this USE. New DEF depends on the path // taken from the original DEF to the USE. The new DEF may be some // collection of PHI's merging values from different paths. The Phis // inserted depend only on the location of the USE. We use a // 2-element cache to handle multiple uses from the same block. handle_use( use, phi, &phi_cache, region_dom, new_false, new_true, old_false, old_true ); } // End of while phi has uses // Because handle_use might relocate region->_out, // we must refresh the iterator. k = region->last_outs(kmin); // Remove the dead Phi _igvn.remove_dead_node( phi ); } else { // Random memory op guarded by Region. Compute new DEF for USE. handle_use( phi, region, ®ion_cache, region_dom, new_false, new_true, old_false, old_true ); } } // End of while merge point has phis // Any leftover bits in the splitting block must not have depended on local // Phi inputs (these have already been split-up). Hence it's safe to hoist // these guys to the dominating point. lazy_replace( region, region_dom ); #ifndef PRODUCT if( VerifyLoopOptimizations ) verify(); #endif }
// We must be at the merge point which post-dominates 'new_false' and // 'new_true'. Figure out which edges into the RegionNode eventually lead up // to false and which to true. Put in a PhiNode to merge values; plug in // the appropriate false-arm or true-arm values. If some path leads to the // original IF, then insert a Phi recursively. Node *PhaseIdealLoop::spinup( Node *iff_dom, Node *new_false, Node *new_true, Node *use_blk, Node *def, small_cache *cache ) { if (use_blk->is_top()) // Handle dead uses return use_blk; Node *prior_n = (Node*)0xdeadbeef; Node *n = use_blk; // Get path input assert( use_blk != iff_dom, "" ); // Here's the "spinup" the dominator tree loop. Do a cache-check // along the way, in case we've come this way before. while( n != iff_dom ) { // Found post-dominating point? prior_n = n; n = idom(n); // Search higher Node *s = cache->probe( prior_n ); // Check cache if( s ) return s; // Cache hit! } Node *phi_post; if( prior_n == new_false || prior_n == new_true ) { phi_post = def->clone(); phi_post->set_req(0, prior_n ); register_new_node(phi_post, prior_n); } else { // This method handles both control uses (looking for Regions) or data // uses (looking for Phis). If looking for a control use, then we need // to insert a Region instead of a Phi; however Regions always exist // previously (the hash_find_insert below would always hit) so we can // return the existing Region. if( def->is_CFG() ) { phi_post = prior_n; // If looking for CFG, return prior } else { assert( def->is_Phi(), "" ); assert( prior_n->is_Region(), "must be a post-dominating merge point" ); // Need a Phi here phi_post = PhiNode::make_blank(prior_n, def); // Search for both true and false on all paths till find one. for( uint i = 1; i < phi_post->req(); i++ ) // For all paths phi_post->init_req( i, spinup( iff_dom, new_false, new_true, prior_n->in(i), def, cache ) ); Node *t = _igvn.hash_find_insert(phi_post); if( t ) { // See if we already have this one // phi_post will not be used, so kill it _igvn.remove_dead_node(phi_post); phi_post->destruct(); phi_post = t; } else { register_new_node( phi_post, prior_n ); } } } // Update cache everywhere prior_n = (Node*)0xdeadbeef; // Reset IDOM walk n = use_blk; // Get path input // Spin-up the idom tree again, basically doing path-compression. // Insert cache entries along the way, so that if we ever hit this // point in the IDOM tree again we'll stop immediately on a cache hit. while( n != iff_dom ) { // Found post-dominating point? prior_n = n; n = idom(n); // Search higher cache->lru_insert( prior_n, phi_post ); // Fill cache } // End of while not gone high enough return phi_post; }