//------------------------------sched_call------------------------------------- uint Block::sched_call( Matcher &m, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { RegMask regs; // Schedule all the users of the call right now. All the users are // projection Nodes, so they must be scheduled next to the call. // Collect all the defined registers. for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { Node* n = mcall->fast_out(i); assert( n->Opcode()==Op_MachProj, "" ); --ready_cnt[n->_idx]; assert( !ready_cnt[n->_idx], "" ); // Schedule next to call _nodes.map(node_cnt++, n); // Collect defined registers regs.OR(n->out_RegMask()); // Check for scheduling the next control-definer if( n->bottom_type() == Type::CONTROL ) // Warm up next pile of heuristic bits needed_for_next_call(n, next_call, bbs); // Children of projections are now all ready for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); // Get user if( bbs[m->_idx] != this ) continue; if( m->is_Phi() ) continue; if( !--ready_cnt[m->_idx] ) worklist.push(m); } } // Act as if the call defines the Frame Pointer. // Certainly the FP is alive and well after the call. regs.Insert(m.c_frame_pointer()); // Set all registers killed and not already defined by the call. uint r_cnt = mcall->tf()->range()->cnt(); int op = mcall->ideal_Opcode(); MachProjNode *proj = new (1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); bbs.map(proj->_idx,this); _nodes.insert(node_cnt++, proj); for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { if( !regs.Member(r) ) { // Not already defined by the call // Save-on-call register? if( (m._register_save_policy[r] == 'C') || (m._register_save_policy[r] == 'A') || ((m._register_save_policy[r] == 'E') && (op == Op_CallRuntime || op == Op_CallNative || op == Op_CallInterpreter || op == Op_CallLeaf)) ) { proj->_rout.Insert(r); } } } return node_cnt; }
//------------------------------call_catch_cleanup----------------------------- // If we inserted any instructions between a Call and his CatchNode, // clone the instructions on all paths below the Catch. void Block::call_catch_cleanup(Block_Array &bbs) { // End of region to clone uint end = end_idx(); if( !_nodes[end]->is_Catch() ) return; // Start of region to clone uint beg = end; while( _nodes[beg-1]->Opcode() != Op_MachProj || !_nodes[beg-1]->in(0)->is_Call() ) { beg--; assert(beg > 0,"Catch cleanup walking beyond block boundry"); } if( beg == end ) return; // Clone along all Catch output paths. Clone area between the 'beg' and // 'end' indices. for( uint i = 0; i < _num_succs; i++ ) { Block *sb = _succs[i]; // Clone the entire area; ignoring the edge fixup for now. for( uint j = end; j > beg; j-- ) { Node *clone = _nodes[j-1]->clone(); sb->_nodes.insert( 1, clone ); bbs.map(clone->_idx,sb); } } // Fixup edges. Check the def-use info per cloned Node for(uint i2 = beg; i2 < end; i2++ ) { uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block Node *n = _nodes[i2]; // Node that got cloned // Need DU safe iterator because of edge manipulation in calls. Node_List *out = new Node_List(Thread::current()->resource_area()); for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) { out->push(n->fast_out(j1)); } uint max = out->size(); for (uint j = 0; j < max; j++) {// For all users Node *use = out->pop(); Block *buse = bbs[use->_idx]; if( use->is_Phi() ) { for( uint k = 1; k < use->req(); k++ ) if( use->in(k) == n ) catch_cleanup_one_use( use, bbs[buse->pred(k)->_idx], n, this, bbs, beg, n_clone_idx, k ); } else { catch_cleanup_one_use( use, buse, n, this, bbs, beg, n_clone_idx, -1 ); } } // End for all users } // End of for all Nodes in cloned area // Remove the now-dead cloned ops for(uint i3 = beg; i3 < end; i3++ ) { _nodes[beg]->disconnect_inputs(NULL); _nodes.remove(beg); } // If the successor blocks have a CreateEx node, move it back to the top for(uint i4 = 0; i4 < _num_succs; i4++ ) { Block *sb = _succs[i4]; MachNode *cex = sb->_nodes[1+end-beg]->is_Mach(); if( cex && cex->ideal_Opcode() == Op_CreateEx ) { sb->_nodes.remove(1+end-beg); sb->_nodes.insert(1,cex); } } }
//------------------------------catch_cleanup_one_use-------------------------- static void catch_cleanup_one_use( Node *use, Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int beg, int n_clone_idx, int use_idx ) { if( !use_blk ) return; // Can happen if the use is a precedence edge // Check out 'use'. If it is in this block, then it must be in // the cloned area. Go to the clone(s) and set them to use the // cloned version of 'n'. if( use_blk == def_blk ) { uint use_idx = def_blk->find_node(use); uint offset_idx = use_idx - beg; for( uint k = 0; k < def_blk->_num_succs; k++ ) { // Get clone in each successor block Block *sb = def_blk->_succs[k]; Node *clone = sb->_nodes[offset_idx+1]; assert( clone->Opcode() == use->Opcode(), "" ); // Make use-clone use the def-clone for( uint l = 0; l < use->len(); l++ ) { if( clone->in(l) == def ) { if( l < use->req() ) { clone->set_req(l,sb->_nodes[n_clone_idx]); } else { clone->rm_prec(l); clone->add_prec(sb->_nodes[n_clone_idx]); l--; } } } } } // Else the use is some block below the Catch. Find the path the value // takes to reach the Catch and make the use occur only on this path. else { // Find which successor block dominates this use. The successor // blocks must all be single-entry (from the Catch only; I will have // split blocks to make this so), hence they all dominate. while( use_blk->_dom_depth > def_blk->_dom_depth+1 ) use_blk = use_blk->_idom; // Find the successor Node *fixup; uint j; for( j = 0; j < def_blk->_num_succs; j++ ) if( use_blk == def_blk->_succs[j] ) break; if( j == def_blk->_num_succs ) { // Block at same level in dom-tree is not a successor. It needs a // PhiNode, the PhiNode uses from the def and IT's uses need fixup. Node *phi = PhiNode::make(use_blk->head(), def); use_blk->_nodes.insert( 1, phi ); bbs.map(phi->_idx,use_blk); for(uint j3 = 1; j3 < use_blk->num_preds(); j3++ ) catch_cleanup_one_use( phi, bbs[use_blk->pred(j3)->_idx], def, def_blk, bbs, beg, n_clone_idx, j3 ); fixup = phi; } else { // Found the use just below the Catch. Make it use the clone. fixup = def_blk->_succs[j]->_nodes[n_clone_idx]; } if( use_idx >= 0 ) { use->set_req(use_idx,fixup); } else { for( uint l = 0; l < use->len(); l++ ) { if( use->in(l) == def ) { if( l < use->req() ) { use->set_req(l,fixup); } else { use->rm_prec(l); use->add_prec(fixup); l--; } } } } } }
//------------------------------implicit_null_check---------------------------- // Detect implicit-null-check opportunities. Basically, find NULL checks // with suitable memory ops nearby. Use the memory op to do the NULL check. // I can generate a memory op if there is not one nearby. void Block::implicit_null_check(Block_Array &bbs, GrowableArray<uint> &latency, Node *proj, Node *val) { // Assume if null check need for 0 offset then always needed // Intel solaris doesn't support any null checks yet and no // mechanism exists (yet) to set the switches at an os_cpu level if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return; // Make sure the ptr-is-null path appears to be uncommon! float f = end()->is_Mach()->is_MachIf()->_prob; if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f; if( f > 0.0001 ) return; uint bidx = 0; // Capture index of value into memop bool was_store; // Memory op is a store op // Search the successor block for a load or store who's base value is also // the tested value. There may be several. Node_List *out = new Node_List(Thread::current()->resource_area()); MachNode *best = NULL; // Best found so far for (DUIterator i = val->outs(); val->has_out(i); i++) { MachNode *mach = val->out(i)->is_Mach(); if( !mach ) continue; was_store = false; switch( mach->ideal_Opcode() ) { case Op_LoadB: case Op_LoadC: case Op_LoadD: case Op_LoadF: case Op_LoadI: case Op_LoadL: case Op_LoadP: case Op_LoadS: case Op_LoadKlass: case Op_LoadRange: case Op_LoadD_unaligned: case Op_LoadL_unaligned: break; case Op_StoreB: case Op_StoreC: case Op_StoreCM: case Op_StoreD: case Op_StoreF: case Op_StoreI: case Op_StoreL: case Op_StoreP: was_store = true; // Memory op is a store op // Stores will have their address in slot 2 (memory in slot 1). // If the value being nul-checked is in another slot, it means we // are storing the checked value, which does NOT check the value! if( mach->in(2) != val ) continue; break; // Found a memory op? case Op_StrComp: // Not a legit memory op for implicit null check regardless of // embedded loads continue; default: // Also check for embedded loads if( !mach->check_for_anti_dependence() ) continue; // Not an memory op; skip it break; } // check if the offset is not too high for implicit exception { intptr_t offset = 0; const TypePtr *adr_type = NULL; // Do not need this return value here const Node* base = mach->get_base_and_disp(offset, adr_type); if (base == NULL || base == (Node*)-1) { // cannot reason about it; is probably not implicit null exception } else { const TypePtr* tptr = base->bottom_type()->is_ptr(); // Give up if offset is not a compile-time constant if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot ) continue; offset += tptr->_offset; // correct if base is offseted if( MacroAssembler::needs_explicit_null_check(offset) ) continue; // Give up is reference is beyond 4K page size } } // Check ctrl input to see if the null-check dominates the memory op Block *cb = bbs[mach->_idx]; cb = cb->_idom; // Always hoist at least 1 block if( !was_store ) { // Stores can be hoisted only one block while( cb->_dom_depth > _dom_depth ) cb = cb->_idom; // Hoist loads as far as we want } if( cb != this ) continue; // Found a memory user; see if it can be hoisted to check-block uint vidx = 0; // Capture index of value into memop uint j; for( j = mach->req()-1; j > 0; j-- ) { if( mach->in(j) == val ) vidx = j; // Block of memory-op input Block *inb = bbs[mach->in(j)->_idx]; Block *b = this; // Start from nul check while( b != inb && b->_dom_depth > inb->_dom_depth ) b = b->_idom; // search upwards for input // See if input dominates null check if( b != inb ) break; } if( j > 0 ) continue; Block *mb = bbs[mach->_idx]; // Hoisting stores requires more checks for the anti-dependence case. // Give up hoisting if we have to move the store past any load. if( was_store ) { Block *b = mb; // Start searching here for a local load // mach use (faulting) trying to hoist // n might be blocker to hoisting while( b != this ) { uint k; for( k = 1; k < b->_nodes.size(); k++ ) { Node *n = b->_nodes[k]; if( n->check_for_anti_dependence() && n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) ) break; // Found anti-dependent load } if( k < b->_nodes.size() ) break; // Found anti-dependent load // Make sure control does not do a merge (would have to check allpaths) if( b->num_preds() != 2 ) break; b = bbs[b->pred(1)->_idx]; // Move up to predecessor block } if( b != this ) continue; } // Make sure this memory op is not already being used for a NullCheck MachNode *e = mb->end()->is_Mach(); if( e && e->is_MachNullCheck() && e->in(1) == mach ) continue; // Already being used as a NULL check // Found a candidate! Pick one with least dom depth - the highest // in the dom tree should be closest to the null check. if( !best || bbs[mach->_idx]->_dom_depth < bbs[best->_idx]->_dom_depth ) { best = mach; bidx = vidx; } } // No candidate! if( !best ) return; // ---- Found an implicit null check extern int implicit_null_checks; implicit_null_checks++; // Hoist the memory candidate up to the end of the test block. Block *old_block = bbs[best->_idx]; old_block->find_remove(best); add_inst(best); bbs.map(best->_idx,this); // Move the control dependence if (best->in(0) && best->in(0) == old_block->_nodes[0]) best->set_req(0, _nodes[0]); // Check for flag-killing projections that also need to be hoisted // Should be DU safe because no edge updates. for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) { Node* n = best->fast_out(j); if( n->Opcode() == Op_MachProj ) { bbs[n->_idx]->find_remove(n); add_inst(n); bbs.map(n->_idx,this); } } // proj==Op_True --> ne test; proj==Op_False --> eq test. // One of two graph shapes got matched: // (IfTrue (If (Bool NE (CmpP ptr NULL)))) // (IfFalse (If (Bool EQ (CmpP ptr NULL)))) // NULL checks are always branch-if-eq. If we see a IfTrue projection // then we are replacing a 'ne' test with a 'eq' NULL check test. // We need to flip the projections to keep the same semantics. if( proj->Opcode() == Op_IfTrue ) { // Swap order of projections in basic block to swap branch targets Node *tmp1 = _nodes[end_idx()+1]; Node *tmp2 = _nodes[end_idx()+2]; _nodes.map(end_idx()+1, tmp2); _nodes.map(end_idx()+2, tmp1); Node *tmp = new (1) Node(1); tmp1->replace_by(tmp); tmp2->replace_by(tmp1); tmp->replace_by(tmp2); } // Remove the existing null check; use a new implicit null check instead. // Since schedule-local needs precise def-use info, we need to correct // it as well. Node *old_tst = proj->in(0); MachNode *nul_chk = new MachNullCheckNode(old_tst->in(0),best,bidx); _nodes.map(end_idx(),nul_chk); bbs.map(nul_chk->_idx,this); // Redirect users of old_test to nul_chk for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2) old_tst->last_out(i2)->set_req(0, nul_chk); // Clean-up any dead code for (uint i3 = 0; i3 < old_tst->req(); i3++) old_tst->set_req(i3, NULL); latency.at_put_grow(nul_chk->_idx, nul_chk->latency_from_uses(bbs, latency)); latency.at_put_grow(best ->_idx, best ->latency_from_uses(bbs, latency)); #ifndef PRODUCT if (TraceOptoPipelining) { tty->print("# implicit_null_check: latency %4d for ", latency.at_grow(best->_idx)); best->fast_dump(); tty->print("# implicit_null_check: latency %4d for ", latency.at_grow(nul_chk->_idx)); nul_chk->fast_dump(); } #endif }
//------------------------------sched_call------------------------------------- uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { RegMask regs; // Schedule all the users of the call right now. All the users are // projection Nodes, so they must be scheduled next to the call. // Collect all the defined registers. for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { Node* n = mcall->fast_out(i); assert( n->is_MachProj(), "" ); int n_cnt = ready_cnt.at(n->_idx)-1; ready_cnt.at_put(n->_idx, n_cnt); assert( n_cnt == 0, "" ); // Schedule next to call _nodes.map(node_cnt++, n); // Collect defined registers regs.OR(n->out_RegMask()); // Check for scheduling the next control-definer if( n->bottom_type() == Type::CONTROL ) // Warm up next pile of heuristic bits needed_for_next_call(n, next_call, bbs); // Children of projections are now all ready for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); // Get user if( bbs[m->_idx] != this ) continue; if( m->is_Phi() ) continue; int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); if( m_cnt == 0 ) worklist.push(m); } } // Act as if the call defines the Frame Pointer. // Certainly the FP is alive and well after the call. regs.Insert(matcher.c_frame_pointer()); // Set all registers killed and not already defined by the call. uint r_cnt = mcall->tf()->range()->cnt(); int op = mcall->ideal_Opcode(); MachProjNode *proj = new (matcher.C, 1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); bbs.map(proj->_idx,this); _nodes.insert(node_cnt++, proj); // Select the right register save policy. const char * save_policy; switch (op) { case Op_CallRuntime: case Op_CallLeaf: case Op_CallLeafNoFP: // Calling C code so use C calling convention save_policy = matcher._c_reg_save_policy; break; case Op_CallStaticJava: case Op_CallDynamicJava: // Calling Java code so use Java calling convention save_policy = matcher._register_save_policy; break; default: ShouldNotReachHere(); } // When using CallRuntime mark SOE registers as killed by the call // so values that could show up in the RegisterMap aren't live in a // callee saved register since the register wouldn't know where to // find them. CallLeaf and CallLeafNoFP are ok because they can't // have debug info on them. Strictly speaking this only needs to be // done for oops since idealreg2debugmask takes care of debug info // references but there no way to handle oops differently than other // pointers as far as the kill mask goes. bool exclude_soe = op == Op_CallRuntime; // If the call is a MethodHandle invoke, we need to exclude the // register which is used to save the SP value over MH invokes from // the mask. Otherwise this register could be used for // deoptimization information. if (op == Op_CallStaticJava) { MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall; if (mcallstaticjava->_method_handle_invoke) proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask()); } add_call_kills(proj, regs, save_policy, exclude_soe); return node_cnt; }
//------------------------------call_catch_cleanup----------------------------- // If we inserted any instructions between a Call and his CatchNode, // clone the instructions on all paths below the Catch. void Block::call_catch_cleanup(Block_Array &bbs) { // End of region to clone uint end = end_idx(); if( !_nodes[end]->is_Catch() ) return; // Start of region to clone uint beg = end; while(!_nodes[beg-1]->is_MachProj() || !_nodes[beg-1]->in(0)->is_MachCall() ) { beg--; assert(beg > 0,"Catch cleanup walking beyond block boundary"); } // Range of inserted instructions is [beg, end) if( beg == end ) return; // Clone along all Catch output paths. Clone area between the 'beg' and // 'end' indices. for( uint i = 0; i < _num_succs; i++ ) { Block *sb = _succs[i]; // Clone the entire area; ignoring the edge fixup for now. for( uint j = end; j > beg; j-- ) { // It is safe here to clone a node with anti_dependence // since clones dominate on each path. Node *clone = _nodes[j-1]->clone(); sb->_nodes.insert( 1, clone ); bbs.map(clone->_idx,sb); } } // Fixup edges. Check the def-use info per cloned Node for(uint i2 = beg; i2 < end; i2++ ) { uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block Node *n = _nodes[i2]; // Node that got cloned // Need DU safe iterator because of edge manipulation in calls. Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area()); for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) { out->push(n->fast_out(j1)); } uint max = out->size(); for (uint j = 0; j < max; j++) {// For all users Node *use = out->pop(); Block *buse = bbs[use->_idx]; if( use->is_Phi() ) { for( uint k = 1; k < use->req(); k++ ) if( use->in(k) == n ) { Node *fixup = catch_cleanup_find_cloned_def(bbs[buse->pred(k)->_idx], n, this, bbs, n_clone_idx); use->set_req(k, fixup); } } else { if (this == buse) { catch_cleanup_intra_block(use, n, this, beg, n_clone_idx); } else { catch_cleanup_inter_block(use, buse, n, this, bbs, n_clone_idx); } } } // End for all users } // End of for all Nodes in cloned area // Remove the now-dead cloned ops for(uint i3 = beg; i3 < end; i3++ ) { _nodes[beg]->disconnect_inputs(NULL); _nodes.remove(beg); } // If the successor blocks have a CreateEx node, move it back to the top for(uint i4 = 0; i4 < _num_succs; i4++ ) { Block *sb = _succs[i4]; uint new_cnt = end - beg; // Remove any newly created, but dead, nodes. for( uint j = new_cnt; j > 0; j-- ) { Node *n = sb->_nodes[j]; if (n->outcnt() == 0 && (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){ n->disconnect_inputs(NULL); sb->_nodes.remove(j); new_cnt--; } } // If any newly created nodes remain, move the CreateEx node to the top if (new_cnt > 0) { Node *cex = sb->_nodes[1+new_cnt]; if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) { sb->_nodes.remove(1+new_cnt); sb->_nodes.insert(1,cex); } } } }