//------------------------------sched_call------------------------------------- uint Block::sched_call( Matcher &m, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { RegMask regs; // Schedule all the users of the call right now. All the users are // projection Nodes, so they must be scheduled next to the call. // Collect all the defined registers. for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { Node* n = mcall->fast_out(i); assert( n->Opcode()==Op_MachProj, "" ); --ready_cnt[n->_idx]; assert( !ready_cnt[n->_idx], "" ); // Schedule next to call _nodes.map(node_cnt++, n); // Collect defined registers regs.OR(n->out_RegMask()); // Check for scheduling the next control-definer if( n->bottom_type() == Type::CONTROL ) // Warm up next pile of heuristic bits needed_for_next_call(n, next_call, bbs); // Children of projections are now all ready for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); // Get user if( bbs[m->_idx] != this ) continue; if( m->is_Phi() ) continue; if( !--ready_cnt[m->_idx] ) worklist.push(m); } } // Act as if the call defines the Frame Pointer. // Certainly the FP is alive and well after the call. regs.Insert(m.c_frame_pointer()); // Set all registers killed and not already defined by the call. uint r_cnt = mcall->tf()->range()->cnt(); int op = mcall->ideal_Opcode(); MachProjNode *proj = new (1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); bbs.map(proj->_idx,this); _nodes.insert(node_cnt++, proj); for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { if( !regs.Member(r) ) { // Not already defined by the call // Save-on-call register? if( (m._register_save_policy[r] == 'C') || (m._register_save_policy[r] == 'A') || ((m._register_save_policy[r] == 'E') && (op == Op_CallRuntime || op == Op_CallNative || op == Op_CallInterpreter || op == Op_CallLeaf)) ) { proj->_rout.Insert(r); } } } return node_cnt; }
//------------------------------schedule_local--------------------------------- // Topological sort within a block. Someday become a real scheduler. bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) { // Already "sorted" are the block start Node (as the first entry), and // the block-ending Node and any trailing control projections. We leave // these alone. PhiNodes and ParmNodes are made to follow the block start // Node. Everything else gets topo-sorted. #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print_cr("# --- schedule_local B%d, before: ---", block->_pre_order); for (uint i = 0;i < block->number_of_nodes(); i++) { tty->print("# "); block->get_node(i)->fast_dump(); } tty->print_cr("#"); } #endif // RootNode is already sorted if (block->number_of_nodes() == 1) { return true; } // Move PhiNodes and ParmNodes from 1 to cnt up to the start uint node_cnt = block->end_idx(); uint phi_cnt = 1; uint i; for( i = 1; i<node_cnt; i++ ) { // Scan for Phi Node *n = block->get_node(i); if( n->is_Phi() || // Found a PhiNode or ParmNode (n->is_Proj() && n->in(0) == block->head()) ) { // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt block->map_node(block->get_node(phi_cnt), i); block->map_node(n, phi_cnt++); // swap Phi/Parm up front } else { // All others // Count block-local inputs to 'n' uint cnt = n->len(); // Input count uint local = 0; for( uint j=0; j<cnt; j++ ) { Node *m = n->in(j); if( m && get_block_for_node(m) == block && !m->is_top() ) local++; // One more block-local input } ready_cnt.at_put(n->_idx, local); // Count em up #ifdef ASSERT if( UseConcMarkSweepGC || UseG1GC ) { if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { // Check the precedence edges for (uint prec = n->req(); prec < n->len(); prec++) { Node* oop_store = n->in(prec); if (oop_store != NULL) { assert(get_block_for_node(oop_store)->_dom_depth <= block->_dom_depth, "oop_store must dominate card-mark"); } } } } #endif // A few node types require changing a required edge to a precedence edge // before allocation. if( n->is_Mach() && n->req() > TypeFunc::Parms && (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire || n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) { // MemBarAcquire could be created without Precedent edge. // del_req() replaces the specified edge with the last input edge // and then removes the last edge. If the specified edge > number of // edges the last edge will be moved outside of the input edges array // and the edge will be lost. This is why this code should be // executed only when Precedent (== TypeFunc::Parms) edge is present. Node *x = n->in(TypeFunc::Parms); n->del_req(TypeFunc::Parms); n->add_prec(x); } } } for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count ready_cnt.at_put(block->get_node(i2)->_idx, 0); // All the prescheduled guys do not hold back internal nodes uint i3; for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled Node *n = block->get_node(i3); // Get pre-scheduled for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); if (get_block_for_node(m) == block) { // Local-block user int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count } } } Node_List delay; // Make a worklist Node_List worklist; for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist Node *m = block->get_node(i4); if( !ready_cnt.at(m->_idx) ) { // Zero ready count? if (m->is_iteratively_computed()) { // Push induction variable increments last to allow other uses // of the phi to be scheduled first. The select() method breaks // ties in scheduling by worklist order. delay.push(m); } else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) { // Force the CreateEx to the top of the list so it's processed // first and ends up at the start of the block. worklist.insert(0, m); } else { worklist.push(m); // Then on to worklist! } } } while (delay.size()) { Node* d = delay.pop(); worklist.push(d); } // Warm up the 'next_call' heuristic bits needed_for_next_call(block, block->head(), next_call); #ifndef PRODUCT if (trace_opto_pipelining()) { for (uint j=0; j< block->number_of_nodes(); j++) { Node *n = block->get_node(j); int idx = n->_idx; tty->print("# ready cnt:%3d ", ready_cnt.at(idx)); tty->print("latency:%3d ", get_latency_for_node(n)); tty->print("%4d: %s\n", idx, n->Name()); } } #endif uint max_idx = (uint)ready_cnt.length(); // Pull from worklist and schedule while( worklist.size() ) { // Worklist is not ready #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("# ready list:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %d", n->_idx); } tty->cr(); } #endif // Select and pop a ready guy from worklist Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt); block->map_node(n, phi_cnt++); // Schedule him next #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("# select %d: %s", n->_idx, n->Name()); tty->print(", latency:%d", get_latency_for_node(n)); n->dump(); if (Verbose) { tty->print("# ready list:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %d", n->_idx); } tty->cr(); } } #endif if( n->is_MachCall() ) { MachCallNode *mcall = n->as_MachCall(); phi_cnt = sched_call(block, phi_cnt, worklist, ready_cnt, mcall, next_call); continue; } if (n->is_Mach() && n->as_Mach()->has_call()) { RegMask regs; regs.Insert(_matcher.c_frame_pointer()); regs.OR(n->out_RegMask()); MachProjNode *proj = new (C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj ); map_node_to_block(proj, block); block->insert_node(proj, phi_cnt++); add_call_kills(proj, regs, _matcher._c_reg_save_policy, false); } // Children are now all ready for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { Node* m = n->fast_out(i5); // Get user if (get_block_for_node(m) != block) { continue; } if( m->is_Phi() ) continue; if (m->_idx >= max_idx) { // new node, skip it assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); continue; } int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); if( m_cnt == 0 ) worklist.push(m); } } if( phi_cnt != block->end_idx() ) { // did not schedule all. Retry, Bailout, or Die if (C->subsume_loads() == true && !C->failing()) { // Retry with subsume_loads == false // If this is the first failure, the sentinel string will "stick" // to the Compile object, and the C2Compiler will see it and retry. C->record_failure(C2Compiler::retry_no_subsuming_loads()); } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; } #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print_cr("#"); tty->print_cr("# after schedule_local"); for (uint i = 0;i < block->number_of_nodes();i++) { tty->print("# "); block->get_node(i)->fast_dump(); } tty->cr(); } #endif return true; }
//------------------------------sched_call------------------------------------- uint PhaseCFG::sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call) { RegMask regs; // Schedule all the users of the call right now. All the users are // projection Nodes, so they must be scheduled next to the call. // Collect all the defined registers. for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { Node* n = mcall->fast_out(i); assert( n->is_MachProj(), "" ); int n_cnt = ready_cnt.at(n->_idx)-1; ready_cnt.at_put(n->_idx, n_cnt); assert( n_cnt == 0, "" ); // Schedule next to call block->map_node(n, node_cnt++); // Collect defined registers regs.OR(n->out_RegMask()); // Check for scheduling the next control-definer if( n->bottom_type() == Type::CONTROL ) // Warm up next pile of heuristic bits needed_for_next_call(block, n, next_call); // Children of projections are now all ready for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); // Get user if(get_block_for_node(m) != block) { continue; } if( m->is_Phi() ) continue; int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); if( m_cnt == 0 ) worklist.push(m); } } // Act as if the call defines the Frame Pointer. // Certainly the FP is alive and well after the call. regs.Insert(_matcher.c_frame_pointer()); // Set all registers killed and not already defined by the call. uint r_cnt = mcall->tf()->range()->cnt(); int op = mcall->ideal_Opcode(); MachProjNode *proj = new (C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); map_node_to_block(proj, block); block->insert_node(proj, node_cnt++); // Select the right register save policy. const char * save_policy; switch (op) { case Op_CallRuntime: case Op_CallLeaf: case Op_CallLeafNoFP: // Calling C code so use C calling convention save_policy = _matcher._c_reg_save_policy; break; case Op_CallStaticJava: case Op_CallDynamicJava: // Calling Java code so use Java calling convention save_policy = _matcher._register_save_policy; break; default: ShouldNotReachHere(); } // When using CallRuntime mark SOE registers as killed by the call // so values that could show up in the RegisterMap aren't live in a // callee saved register since the register wouldn't know where to // find them. CallLeaf and CallLeafNoFP are ok because they can't // have debug info on them. Strictly speaking this only needs to be // done for oops since idealreg2debugmask takes care of debug info // references but there no way to handle oops differently than other // pointers as far as the kill mask goes. bool exclude_soe = op == Op_CallRuntime; // If the call is a MethodHandle invoke, we need to exclude the // register which is used to save the SP value over MH invokes from // the mask. Otherwise this register could be used for // deoptimization information. if (op == Op_CallStaticJava) { MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall; if (mcallstaticjava->_method_handle_invoke) proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask()); } add_call_kills(proj, regs, save_policy, exclude_soe); return node_cnt; }
//------------------------------sched_call------------------------------------- uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) { RegMask regs; // Schedule all the users of the call right now. All the users are // projection Nodes, so they must be scheduled next to the call. // Collect all the defined registers. for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) { Node* n = mcall->fast_out(i); assert( n->Opcode()==Op_MachProj, "" ); --ready_cnt[n->_idx]; assert( !ready_cnt[n->_idx], "" ); // Schedule next to call _nodes.map(node_cnt++, n); // Collect defined registers regs.OR(n->out_RegMask()); // Check for scheduling the next control-definer if( n->bottom_type() == Type::CONTROL ) // Warm up next pile of heuristic bits needed_for_next_call(n, next_call, bbs); // Children of projections are now all ready for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); // Get user if( bbs[m->_idx] != this ) continue; if( m->is_Phi() ) continue; if( !--ready_cnt[m->_idx] ) worklist.push(m); } } // Act as if the call defines the Frame Pointer. // Certainly the FP is alive and well after the call. regs.Insert(matcher.c_frame_pointer()); // Set all registers killed and not already defined by the call. uint r_cnt = mcall->tf()->range()->cnt(); int op = mcall->ideal_Opcode(); MachProjNode *proj = new (matcher.C, 1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj ); bbs.map(proj->_idx,this); _nodes.insert(node_cnt++, proj); // Select the right register save policy. const char * save_policy; switch (op) { case Op_CallRuntime: case Op_CallLeaf: case Op_CallLeafNoFP: // Calling C code so use C calling convention save_policy = matcher._c_reg_save_policy; break; case Op_CallStaticJava: case Op_CallDynamicJava: // Calling Java code so use Java calling convention save_policy = matcher._register_save_policy; break; default: ShouldNotReachHere(); } // When using CallRuntime mark SOE registers as killed by the call // so values that could show up in the RegisterMap aren't live in a // callee saved register since the register wouldn't know where to // find them. CallLeaf and CallLeafNoFP are ok because they can't // have debug info on them. Strictly speaking this only needs to be // done for oops since idealreg2debugmask takes care of debug info // references but there no way to handle oops differently than other // pointers as far as the kill mask goes. bool exclude_soe = op == Op_CallRuntime; // If the call is a MethodHandle invoke, we need to exclude the // register which is used to save the SP value over MH invokes from // the mask. Otherwise this register could be used for // deoptimization information. if (op == Op_CallStaticJava) { MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall; if (mcallstaticjava->_method_handle_invoke) proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask()); } // Fill in the kill mask for the call for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) { if( !regs.Member(r) ) { // Not already defined by the call // Save-on-call register? if ((save_policy[r] == 'C') || (save_policy[r] == 'A') || ((save_policy[r] == 'E') && exclude_soe)) { proj->_rout.Insert(r); } } } return node_cnt; }