//------------------------------sched_call------------------------------------- uint Block::sched_call( Matcher &m, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { RegMask regs; // Schedule all the users of the call right now. All the users are // projection Nodes, so they must be scheduled next to the call. // Collect all the defined registers. for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { Node* n = mcall->fast_out(i); assert( n->Opcode()==Op_MachProj, "" ); --ready_cnt[n->_idx]; assert( !ready_cnt[n->_idx], "" ); // Schedule next to call _nodes.map(node_cnt++, n); // Collect defined registers regs.OR(n->out_RegMask()); // Check for scheduling the next control-definer if( n->bottom_type() == Type::CONTROL ) // Warm up next pile of heuristic bits needed_for_next_call(n, next_call, bbs); // Children of projections are now all ready for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); // Get user if( bbs[m->_idx] != this ) continue; if( m->is_Phi() ) continue; if( !--ready_cnt[m->_idx] ) worklist.push(m); } } // Act as if the call defines the Frame Pointer. // Certainly the FP is alive and well after the call. regs.Insert(m.c_frame_pointer()); // Set all registers killed and not already defined by the call. uint r_cnt = mcall->tf()->range()->cnt(); int op = mcall->ideal_Opcode(); MachProjNode *proj = new (1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); bbs.map(proj->_idx,this); _nodes.insert(node_cnt++, proj); for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { if( !regs.Member(r) ) { // Not already defined by the call // Save-on-call register? if( (m._register_save_policy[r] == 'C') || (m._register_save_policy[r] == 'A') || ((m._register_save_policy[r] == 'E') && (op == Op_CallRuntime || op == Op_CallNative || op == Op_CallInterpreter || op == Op_CallLeaf)) ) { proj->_rout.Insert(r); } } } return node_cnt; }
//------------------------------expand_recur---------------------------- static void expand_recur( Node *n, VectorSet &visited, Node_List &worklist ) { if( !n ) return; if( visited.test_set(n->_idx) ) return; switch (n->class_id()) { case Node::Class_Allocate: // We use millicode instead of inline allocation case Node::Class_AllocateArray: case Node::Class_Lock: case Node::Class_Unlock: case Node::Class_SafePoint: // case Node::Class_GetKlass: worklist.push(n); } for(uint i=0; i<n->req(); i++) expand_recur(n->in(i),visited,worklist); }
//------------------------------schedule_local--------------------------------- // Topological sort within a block. Someday become a real scheduler. bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) { // Already "sorted" are the block start Node (as the first entry), and // the block-ending Node and any trailing control projections. We leave // these alone. PhiNodes and ParmNodes are made to follow the block start // Node. Everything else gets topo-sorted. #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print_cr("# --- schedule_local B%d, before: ---", block->_pre_order); for (uint i = 0;i < block->number_of_nodes(); i++) { tty->print("# "); block->get_node(i)->fast_dump(); } tty->print_cr("#"); } #endif // RootNode is already sorted if (block->number_of_nodes() == 1) { return true; } // Move PhiNodes and ParmNodes from 1 to cnt up to the start uint node_cnt = block->end_idx(); uint phi_cnt = 1; uint i; for( i = 1; i<node_cnt; i++ ) { // Scan for Phi Node *n = block->get_node(i); if( n->is_Phi() || // Found a PhiNode or ParmNode (n->is_Proj() && n->in(0) == block->head()) ) { // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt block->map_node(block->get_node(phi_cnt), i); block->map_node(n, phi_cnt++); // swap Phi/Parm up front } else { // All others // Count block-local inputs to 'n' uint cnt = n->len(); // Input count uint local = 0; for( uint j=0; j<cnt; j++ ) { Node *m = n->in(j); if( m && get_block_for_node(m) == block && !m->is_top() ) local++; // One more block-local input } ready_cnt.at_put(n->_idx, local); // Count em up #ifdef ASSERT if( UseConcMarkSweepGC || UseG1GC ) { if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { // Check the precedence edges for (uint prec = n->req(); prec < n->len(); prec++) { Node* oop_store = n->in(prec); if (oop_store != NULL) { assert(get_block_for_node(oop_store)->_dom_depth <= block->_dom_depth, "oop_store must dominate card-mark"); } } } } #endif // A few node types require changing a required edge to a precedence edge // before allocation. if( n->is_Mach() && n->req() > TypeFunc::Parms && (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire || n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) { // MemBarAcquire could be created without Precedent edge. // del_req() replaces the specified edge with the last input edge // and then removes the last edge. If the specified edge > number of // edges the last edge will be moved outside of the input edges array // and the edge will be lost. This is why this code should be // executed only when Precedent (== TypeFunc::Parms) edge is present. Node *x = n->in(TypeFunc::Parms); n->del_req(TypeFunc::Parms); n->add_prec(x); } } } for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count ready_cnt.at_put(block->get_node(i2)->_idx, 0); // All the prescheduled guys do not hold back internal nodes uint i3; for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled Node *n = block->get_node(i3); // Get pre-scheduled for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); if (get_block_for_node(m) == block) { // Local-block user int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count } } } Node_List delay; // Make a worklist Node_List worklist; for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist Node *m = block->get_node(i4); if( !ready_cnt.at(m->_idx) ) { // Zero ready count? if (m->is_iteratively_computed()) { // Push induction variable increments last to allow other uses // of the phi to be scheduled first. The select() method breaks // ties in scheduling by worklist order. delay.push(m); } else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) { // Force the CreateEx to the top of the list so it's processed // first and ends up at the start of the block. worklist.insert(0, m); } else { worklist.push(m); // Then on to worklist! } } } while (delay.size()) { Node* d = delay.pop(); worklist.push(d); } // Warm up the 'next_call' heuristic bits needed_for_next_call(block, block->head(), next_call); #ifndef PRODUCT if (trace_opto_pipelining()) { for (uint j=0; j< block->number_of_nodes(); j++) { Node *n = block->get_node(j); int idx = n->_idx; tty->print("# ready cnt:%3d ", ready_cnt.at(idx)); tty->print("latency:%3d ", get_latency_for_node(n)); tty->print("%4d: %s\n", idx, n->Name()); } } #endif uint max_idx = (uint)ready_cnt.length(); // Pull from worklist and schedule while( worklist.size() ) { // Worklist is not ready #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("# ready list:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %d", n->_idx); } tty->cr(); } #endif // Select and pop a ready guy from worklist Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt); block->map_node(n, phi_cnt++); // Schedule him next #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("# select %d: %s", n->_idx, n->Name()); tty->print(", latency:%d", get_latency_for_node(n)); n->dump(); if (Verbose) { tty->print("# ready list:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %d", n->_idx); } tty->cr(); } } #endif if( n->is_MachCall() ) { MachCallNode *mcall = n->as_MachCall(); phi_cnt = sched_call(block, phi_cnt, worklist, ready_cnt, mcall, next_call); continue; } if (n->is_Mach() && n->as_Mach()->has_call()) { RegMask regs; regs.Insert(_matcher.c_frame_pointer()); regs.OR(n->out_RegMask()); MachProjNode *proj = new (C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj ); map_node_to_block(proj, block); block->insert_node(proj, phi_cnt++); add_call_kills(proj, regs, _matcher._c_reg_save_policy, false); } // Children are now all ready for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { Node* m = n->fast_out(i5); // Get user if (get_block_for_node(m) != block) { continue; } if( m->is_Phi() ) continue; if (m->_idx >= max_idx) { // new node, skip it assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); continue; } int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); if( m_cnt == 0 ) worklist.push(m); } } if( phi_cnt != block->end_idx() ) { // did not schedule all. Retry, Bailout, or Die if (C->subsume_loads() == true && !C->failing()) { // Retry with subsume_loads == false // If this is the first failure, the sentinel string will "stick" // to the Compile object, and the C2Compiler will see it and retry. C->record_failure(C2Compiler::retry_no_subsuming_loads()); } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; } #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print_cr("#"); tty->print_cr("# after schedule_local"); for (uint i = 0;i < block->number_of_nodes();i++) { tty->print("# "); block->get_node(i)->fast_dump(); } tty->cr(); } #endif return true; }
//------------------------------sched_call------------------------------------- uint PhaseCFG::sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call) { RegMask regs; // Schedule all the users of the call right now. All the users are // projection Nodes, so they must be scheduled next to the call. // Collect all the defined registers. for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { Node* n = mcall->fast_out(i); assert( n->is_MachProj(), "" ); int n_cnt = ready_cnt.at(n->_idx)-1; ready_cnt.at_put(n->_idx, n_cnt); assert( n_cnt == 0, "" ); // Schedule next to call block->map_node(n, node_cnt++); // Collect defined registers regs.OR(n->out_RegMask()); // Check for scheduling the next control-definer if( n->bottom_type() == Type::CONTROL ) // Warm up next pile of heuristic bits needed_for_next_call(block, n, next_call); // Children of projections are now all ready for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); // Get user if(get_block_for_node(m) != block) { continue; } if( m->is_Phi() ) continue; int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); if( m_cnt == 0 ) worklist.push(m); } } // Act as if the call defines the Frame Pointer. // Certainly the FP is alive and well after the call. regs.Insert(_matcher.c_frame_pointer()); // Set all registers killed and not already defined by the call. uint r_cnt = mcall->tf()->range()->cnt(); int op = mcall->ideal_Opcode(); MachProjNode *proj = new (C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); map_node_to_block(proj, block); block->insert_node(proj, node_cnt++); // Select the right register save policy. const char * save_policy; switch (op) { case Op_CallRuntime: case Op_CallLeaf: case Op_CallLeafNoFP: // Calling C code so use C calling convention save_policy = _matcher._c_reg_save_policy; break; case Op_CallStaticJava: case Op_CallDynamicJava: // Calling Java code so use Java calling convention save_policy = _matcher._register_save_policy; break; default: ShouldNotReachHere(); } // When using CallRuntime mark SOE registers as killed by the call // so values that could show up in the RegisterMap aren't live in a // callee saved register since the register wouldn't know where to // find them. CallLeaf and CallLeafNoFP are ok because they can't // have debug info on them. Strictly speaking this only needs to be // done for oops since idealreg2debugmask takes care of debug info // references but there no way to handle oops differently than other // pointers as far as the kill mask goes. bool exclude_soe = op == Op_CallRuntime; // If the call is a MethodHandle invoke, we need to exclude the // register which is used to save the SP value over MH invokes from // the mask. Otherwise this register could be used for // deoptimization information. if (op == Op_CallStaticJava) { MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall; if (mcallstaticjava->_method_handle_invoke) proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask()); } add_call_kills(proj, regs, save_policy, exclude_soe); return node_cnt; }
//------------------------------do_unswitching----------------------------- // Clone loop with an invariant test (that does not exit) and // insert a clone of the test that selects which version to // execute. void PhaseIdealLoop::do_unswitching (IdealLoopTree *loop, Node_List &old_new) { // Find first invariant test that doesn't exit the loop LoopNode *head = loop->_head->as_Loop(); IfNode* unswitch_iff = find_unswitching_candidate((const IdealLoopTree *)loop); assert(unswitch_iff != NULL, "should be at least one"); // Need to revert back to normal loop if (head->is_CountedLoop() && !head->as_CountedLoop()->is_normal_loop()) { head->as_CountedLoop()->set_normal_loop(); } ProjNode* proj_true = create_slow_version_of_loop(loop, old_new); assert(proj_true->is_IfTrue() && proj_true->unique_ctrl_out() == head, "by construction"); // Increment unswitch count LoopNode* head_clone = old_new[head->_idx]->as_Loop(); int nct = head->unswitch_count() + 1; head->set_unswitch_count(nct); head_clone->set_unswitch_count(nct); // Add test to new "if" outside of loop IfNode* invar_iff = proj_true->in(0)->as_If(); Node* invar_iff_c = invar_iff->in(0); BoolNode* bol = unswitch_iff->in(1)->as_Bool(); invar_iff->set_req(1, bol); invar_iff->_prob = unswitch_iff->_prob; ProjNode* proj_false = invar_iff->proj_out(0)->as_Proj(); // Hoist invariant casts out of each loop to the appropriate // control projection. Node_List worklist; for (DUIterator_Fast imax, i = unswitch_iff->fast_outs(imax); i < imax; i++) { ProjNode* proj= unswitch_iff->fast_out(i)->as_Proj(); // Copy to a worklist for easier manipulation for (DUIterator_Fast jmax, j = proj->fast_outs(jmax); j < jmax; j++) { Node* use = proj->fast_out(j); if (use->Opcode() == Op_CheckCastPP && loop->is_invariant(use->in(1))) { worklist.push(use); } } ProjNode* invar_proj = invar_iff->proj_out(proj->_con)->as_Proj(); while (worklist.size() > 0) { Node* use = worklist.pop(); Node* nuse = use->clone(); nuse->set_req(0, invar_proj); _igvn.hash_delete(use); use->set_req(1, nuse); _igvn._worklist.push(use); register_new_node(nuse, invar_proj); // Same for the clone Node* use_clone = old_new[use->_idx]; _igvn.hash_delete(use_clone); use_clone->set_req(1, nuse); _igvn._worklist.push(use_clone); } } // Hardwire the control paths in the loops into if(true) and if(false) _igvn.hash_delete(unswitch_iff); short_circuit_if(unswitch_iff, proj_true); _igvn._worklist.push(unswitch_iff); IfNode* unswitch_iff_clone = old_new[unswitch_iff->_idx]->as_If(); _igvn.hash_delete(unswitch_iff_clone); short_circuit_if(unswitch_iff_clone, proj_false); _igvn._worklist.push(unswitch_iff_clone); // Reoptimize loops loop->record_for_igvn(); for(int i = loop->_body.size() - 1; i >= 0 ; i--) { Node *n = loop->_body[i]; Node *n_clone = old_new[n->_idx]; _igvn._worklist.push(n_clone); } #ifndef PRODUCT if (TraceLoopUnswitching) { tty->print_cr("Loop unswitching orig: %d @ %d new: %d @ %d", head->_idx, unswitch_iff->_idx, old_new[head->_idx]->_idx, unswitch_iff_clone->_idx); } #endif C->set_major_progress(); }
//------------------------------call_catch_cleanup----------------------------- // If we inserted any instructions between a Call and his CatchNode, // clone the instructions on all paths below the Catch. void Block::call_catch_cleanup(Block_Array &bbs) { // End of region to clone uint end = end_idx(); if( !_nodes[end]->is_Catch() ) return; // Start of region to clone uint beg = end; while( _nodes[beg-1]->Opcode() != Op_MachProj || !_nodes[beg-1]->in(0)->is_Call() ) { beg--; assert(beg > 0,"Catch cleanup walking beyond block boundry"); } if( beg == end ) return; // Clone along all Catch output paths. Clone area between the 'beg' and // 'end' indices. for( uint i = 0; i < _num_succs; i++ ) { Block *sb = _succs[i]; // Clone the entire area; ignoring the edge fixup for now. for( uint j = end; j > beg; j-- ) { Node *clone = _nodes[j-1]->clone(); sb->_nodes.insert( 1, clone ); bbs.map(clone->_idx,sb); } } // Fixup edges. Check the def-use info per cloned Node for(uint i2 = beg; i2 < end; i2++ ) { uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block Node *n = _nodes[i2]; // Node that got cloned // Need DU safe iterator because of edge manipulation in calls. Node_List *out = new Node_List(Thread::current()->resource_area()); for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) { out->push(n->fast_out(j1)); } uint max = out->size(); for (uint j = 0; j < max; j++) {// For all users Node *use = out->pop(); Block *buse = bbs[use->_idx]; if( use->is_Phi() ) { for( uint k = 1; k < use->req(); k++ ) if( use->in(k) == n ) catch_cleanup_one_use( use, bbs[buse->pred(k)->_idx], n, this, bbs, beg, n_clone_idx, k ); } else { catch_cleanup_one_use( use, buse, n, this, bbs, beg, n_clone_idx, -1 ); } } // End for all users } // End of for all Nodes in cloned area // Remove the now-dead cloned ops for(uint i3 = beg; i3 < end; i3++ ) { _nodes[beg]->disconnect_inputs(NULL); _nodes.remove(beg); } // If the successor blocks have a CreateEx node, move it back to the top for(uint i4 = 0; i4 < _num_succs; i4++ ) { Block *sb = _succs[i4]; MachNode *cex = sb->_nodes[1+end-beg]->is_Mach(); if( cex && cex->ideal_Opcode() == Op_CreateEx ) { sb->_nodes.remove(1+end-beg); sb->_nodes.insert(1,cex); } } }
//------------------------------schedule_local--------------------------------- // Topological sort within a block. Someday become a real scheduler. bool Block::schedule_local(Matcher &matcher, Block_Array &bbs,int *ready_cnt, VectorSet &next_call, GrowableArray<uint> &node_latency) { // Already "sorted" are the block start Node (as the first entry), and // the block-ending Node and any trailing control projections. We leave // these alone. PhiNodes and ParmNodes are made to follow the block start // Node. Everything else gets topo-sorted. #ifndef PRODUCT if (TraceOptoPipelining) { tty->print("# before schedule_local\n"); for (uint i = 0;i < _nodes.size();i++) { tty->print("# "); _nodes[i]->fast_dump(); } tty->print("\n"); } #endif // RootNode is already sorted if( _nodes.size() == 1 ) return true; // Move PhiNodes and ParmNodes from 1 to cnt up to the start uint node_cnt = end_idx(); uint phi_cnt = 1; uint i; for( i = 1; i<node_cnt; i++ ) { // Scan for Phi Node *n = _nodes[i]; if( n->is_Phi() || // Found a PhiNode or ParmNode (n->is_Proj() && n->in(0) == head()) ) { // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt _nodes.map(i,_nodes[phi_cnt]); _nodes.map(phi_cnt++,n); // swap Phi/Parm up front } else { // All others // Count block-local inputs to 'n' uint cnt = n->len(); // Input count uint local = 0; for( uint j=0; j<cnt; j++ ) { Node *m = n->in(j); if( m && bbs[m->_idx] == this && !m->is_top() ) local++; // One more block-local input } ready_cnt[n->_idx] = local; // Count em up // A few node types require changing a required edge to a precedence edge // before allocation. MachNode *m = n->is_Mach(); if( UseConcMarkSweepGC ) { if( m && m->ideal_Opcode() == Op_StoreCM ) { // Note: Required edges with an index greater than oper_input_base // are not supported by the allocator. // Note2: Can only depend on unmatched edge being last, // can not depend on its absolute position. Node *oop_store = n->in(n->req() - 1); n->del_req(n->req() - 1); n->add_prec(oop_store); assert(bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); } } if( m && m->ideal_Opcode() == Op_MemBarAcquire ) { Node *x = n->in(TypeFunc::Parms); n->del_req(TypeFunc::Parms); n->add_prec(x); } } } for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count ready_cnt[_nodes[i2]->_idx] = 0; // All the prescheduled guys do not hold back internal nodes uint i3; for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled Node *n = _nodes[i3]; // Get pre-scheduled for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); if( bbs[m->_idx] ==this ) // Local-block user ready_cnt[m->_idx]--; // Fix ready count } } // Make a worklist Node_List worklist; for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist Node *m = _nodes[i4]; if( !ready_cnt[m->_idx] ) // Zero ready count? worklist.push(m); // Then on to worklist! } // Warm up the 'next_call' heuristic bits needed_for_next_call(_nodes[0], next_call, bbs); #ifndef PRODUCT if (TraceOptoPipelining) { for (uint j=0; j<_nodes.size(); j++) { Node *n = _nodes[j]; int idx = n->_idx; tty->print("# ready cnt:%3d ", ready_cnt[idx]); tty->print("latency:%3d ", node_latency.at_grow(idx)); tty->print("%4d: %s\n", idx, n->Name()); } } #endif // Pull from worklist and schedule while( worklist.size() ) { // Worklist is not ready #ifndef PRODUCT uint before_size = worklist.size(); if (TraceOptoPipelining && before_size > 1) { tty->print("# before select:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %3d", n->_idx); } tty->print("\n"); } #endif // Select and pop a ready guy from worklist Node* n = select(worklist, bbs, ready_cnt, next_call, phi_cnt, node_latency); _nodes.map(phi_cnt++,n); // Schedule him next MachNode *m = n->is_Mach(); #ifndef PRODUCT if (TraceOptoPipelining && before_size > 1) { tty->print("# select %d: %s", n->_idx, n->Name()); tty->print(", latency:%d", node_latency.at_grow(n->_idx)); n->dump(); tty->print("# after select:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %4d", n->_idx); } tty->print("\n"); } #endif if( m ) { MachCallNode *mcall = m->is_MachCall(); if( mcall ) { phi_cnt = sched_call(matcher, bbs, phi_cnt, worklist, ready_cnt, mcall, next_call); continue; } } // Children are now all ready for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { Node* m = n->fast_out(i5); // Get user if( bbs[m->_idx] != this ) continue; if( m->is_Phi() ) continue; if( !--ready_cnt[m->_idx] ) worklist.push(m); } } if( phi_cnt != end_idx() ) { // did not schedule all. Retry, Bailout, or Die Compile* C = matcher.C; if (C->subsume_loads() == true) { // Retry with subsume_loads == false C->set_result(Compile::Comp_subsumed_load_conflict); } else { // Bailout without retry C->set_result(Compile::Comp_no_retry); } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; } #ifndef PRODUCT if (TraceOptoPipelining) { tty->print("# after schedule_local\n"); for (uint i = 0;i < _nodes.size();i++) { tty->print("# "); _nodes[i]->fast_dump(); } tty->print("\n"); } #endif return true; }
//------------------------------schedule_local--------------------------------- // Topological sort within a block. Someday become a real scheduler. bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, int *ready_cnt, VectorSet &next_call) { // Already "sorted" are the block start Node (as the first entry), and // the block-ending Node and any trailing control projections. We leave // these alone. PhiNodes and ParmNodes are made to follow the block start // Node. Everything else gets topo-sorted. #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { tty->print_cr("# --- schedule_local B%d, before: ---", _pre_order); for (uint i = 0;i < _nodes.size();i++) { tty->print("# "); _nodes[i]->fast_dump(); } tty->print_cr("#"); } #endif // RootNode is already sorted if( _nodes.size() == 1 ) return true; // Move PhiNodes and ParmNodes from 1 to cnt up to the start uint node_cnt = end_idx(); uint phi_cnt = 1; uint i; for( i = 1; i<node_cnt; i++ ) { // Scan for Phi Node *n = _nodes[i]; if( n->is_Phi() || // Found a PhiNode or ParmNode (n->is_Proj() && n->in(0) == head()) ) { // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt _nodes.map(i,_nodes[phi_cnt]); _nodes.map(phi_cnt++,n); // swap Phi/Parm up front } else { // All others // Count block-local inputs to 'n' uint cnt = n->len(); // Input count uint local = 0; for( uint j=0; j<cnt; j++ ) { Node *m = n->in(j); if( m && cfg->_bbs[m->_idx] == this && !m->is_top() ) local++; // One more block-local input } ready_cnt[n->_idx] = local; // Count em up // A few node types require changing a required edge to a precedence edge // before allocation. if( UseConcMarkSweepGC || UseG1GC ) { if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { // Note: Required edges with an index greater than oper_input_base // are not supported by the allocator. // Note2: Can only depend on unmatched edge being last, // can not depend on its absolute position. Node *oop_store = n->in(n->req() - 1); n->del_req(n->req() - 1); n->add_prec(oop_store); assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); } } if( n->is_Mach() && n->req() > TypeFunc::Parms && (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire || n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) { // MemBarAcquire could be created without Precedent edge. // del_req() replaces the specified edge with the last input edge // and then removes the last edge. If the specified edge > number of // edges the last edge will be moved outside of the input edges array // and the edge will be lost. This is why this code should be // executed only when Precedent (== TypeFunc::Parms) edge is present. Node *x = n->in(TypeFunc::Parms); n->del_req(TypeFunc::Parms); n->add_prec(x); } } } for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count ready_cnt[_nodes[i2]->_idx] = 0; // All the prescheduled guys do not hold back internal nodes uint i3; for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled Node *n = _nodes[i3]; // Get pre-scheduled for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); if( cfg->_bbs[m->_idx] ==this ) // Local-block user ready_cnt[m->_idx]--; // Fix ready count } } Node_List delay; // Make a worklist Node_List worklist; for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist Node *m = _nodes[i4]; if( !ready_cnt[m->_idx] ) { // Zero ready count? if (m->is_iteratively_computed()) { // Push induction variable increments last to allow other uses // of the phi to be scheduled first. The select() method breaks // ties in scheduling by worklist order. delay.push(m); } else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) { // Force the CreateEx to the top of the list so it's processed // first and ends up at the start of the block. worklist.insert(0, m); } else { worklist.push(m); // Then on to worklist! } } } while (delay.size()) { Node* d = delay.pop(); worklist.push(d); } // Warm up the 'next_call' heuristic bits needed_for_next_call(_nodes[0], next_call, cfg->_bbs); #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { for (uint j=0; j<_nodes.size(); j++) { Node *n = _nodes[j]; int idx = n->_idx; tty->print("# ready cnt:%3d ", ready_cnt[idx]); tty->print("latency:%3d ", cfg->_node_latency.at_grow(idx)); tty->print("%4d: %s\n", idx, n->Name()); } } #endif // Pull from worklist and schedule while( worklist.size() ) { // Worklist is not ready #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { tty->print("# ready list:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %d", n->_idx); } tty->cr(); } #endif // Select and pop a ready guy from worklist Node* n = select(cfg, worklist, ready_cnt, next_call, phi_cnt); _nodes.map(phi_cnt++,n); // Schedule him next #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { tty->print("# select %d: %s", n->_idx, n->Name()); tty->print(", latency:%d", cfg->_node_latency.at_grow(n->_idx)); n->dump(); if (Verbose) { tty->print("# ready list:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %d", n->_idx); } tty->cr(); } } #endif if( n->is_MachCall() ) { MachCallNode *mcall = n->as_MachCall(); phi_cnt = sched_call(matcher, cfg->_bbs, phi_cnt, worklist, ready_cnt, mcall, next_call); continue; } // Children are now all ready for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { Node* m = n->fast_out(i5); // Get user if( cfg->_bbs[m->_idx] != this ) continue; if( m->is_Phi() ) continue; if( !--ready_cnt[m->_idx] ) worklist.push(m); } } if( phi_cnt != end_idx() ) { // did not schedule all. Retry, Bailout, or Die Compile* C = matcher.C; if (C->subsume_loads() == true && !C->failing()) { // Retry with subsume_loads == false // If this is the first failure, the sentinel string will "stick" // to the Compile object, and the C2Compiler will see it and retry. C->record_failure(C2Compiler::retry_no_subsuming_loads()); } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; } #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { tty->print_cr("#"); tty->print_cr("# after schedule_local"); for (uint i = 0;i < _nodes.size();i++) { tty->print("# "); _nodes[i]->fast_dump(); } tty->cr(); } #endif return true; }
//------------------------------sched_call------------------------------------- uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { RegMask regs; // Schedule all the users of the call right now. All the users are // projection Nodes, so they must be scheduled next to the call. // Collect all the defined registers. for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { Node* n = mcall->fast_out(i); assert( n->Opcode()==Op_MachProj, "" ); --ready_cnt[n->_idx]; assert( !ready_cnt[n->_idx], "" ); // Schedule next to call _nodes.map(node_cnt++, n); // Collect defined registers regs.OR(n->out_RegMask()); // Check for scheduling the next control-definer if( n->bottom_type() == Type::CONTROL ) // Warm up next pile of heuristic bits needed_for_next_call(n, next_call, bbs); // Children of projections are now all ready for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); // Get user if( bbs[m->_idx] != this ) continue; if( m->is_Phi() ) continue; if( !--ready_cnt[m->_idx] ) worklist.push(m); } } // Act as if the call defines the Frame Pointer. // Certainly the FP is alive and well after the call. regs.Insert(matcher.c_frame_pointer()); // Set all registers killed and not already defined by the call. uint r_cnt = mcall->tf()->range()->cnt(); int op = mcall->ideal_Opcode(); MachProjNode *proj = new (matcher.C, 1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); bbs.map(proj->_idx,this); _nodes.insert(node_cnt++, proj); // Select the right register save policy. const char * save_policy; switch (op) { case Op_CallRuntime: case Op_CallLeaf: case Op_CallLeafNoFP: // Calling C code so use C calling convention save_policy = matcher._c_reg_save_policy; break; case Op_CallStaticJava: case Op_CallDynamicJava: // Calling Java code so use Java calling convention save_policy = matcher._register_save_policy; break; default: ShouldNotReachHere(); } // When using CallRuntime mark SOE registers as killed by the call // so values that could show up in the RegisterMap aren't live in a // callee saved register since the register wouldn't know where to // find them. CallLeaf and CallLeafNoFP are ok because they can't // have debug info on them. Strictly speaking this only needs to be // done for oops since idealreg2debugmask takes care of debug info // references but there no way to handle oops differently than other // pointers as far as the kill mask goes. bool exclude_soe = op == Op_CallRuntime; // If the call is a MethodHandle invoke, we need to exclude the // register which is used to save the SP value over MH invokes from // the mask. Otherwise this register could be used for // deoptimization information. if (op == Op_CallStaticJava) { MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall; if (mcallstaticjava->_method_handle_invoke) proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask()); } // Fill in the kill mask for the call for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { if( !regs.Member(r) ) { // Not already defined by the call // Save-on-call register? if ((save_policy[r] == 'C') || (save_policy[r] == 'A') || ((save_policy[r] == 'E') && exclude_soe)) { proj->_rout.Insert(r); } } } return node_cnt; }