//------------------------------schedule_local--------------------------------- // Topological sort within a block. Someday become a real scheduler. bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) { // Already "sorted" are the block start Node (as the first entry), and // the block-ending Node and any trailing control projections. We leave // these alone. PhiNodes and ParmNodes are made to follow the block start // Node. Everything else gets topo-sorted. #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print_cr("# --- schedule_local B%d, before: ---", block->_pre_order); for (uint i = 0;i < block->number_of_nodes(); i++) { tty->print("# "); block->get_node(i)->fast_dump(); } tty->print_cr("#"); } #endif // RootNode is already sorted if (block->number_of_nodes() == 1) { return true; } // Move PhiNodes and ParmNodes from 1 to cnt up to the start uint node_cnt = block->end_idx(); uint phi_cnt = 1; uint i; for( i = 1; i<node_cnt; i++ ) { // Scan for Phi Node *n = block->get_node(i); if( n->is_Phi() || // Found a PhiNode or ParmNode (n->is_Proj() && n->in(0) == block->head()) ) { // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt block->map_node(block->get_node(phi_cnt), i); block->map_node(n, phi_cnt++); // swap Phi/Parm up front } else { // All others // Count block-local inputs to 'n' uint cnt = n->len(); // Input count uint local = 0; for( uint j=0; j<cnt; j++ ) { Node *m = n->in(j); if( m && get_block_for_node(m) == block && !m->is_top() ) local++; // One more block-local input } ready_cnt.at_put(n->_idx, local); // Count em up #ifdef ASSERT if( UseConcMarkSweepGC || UseG1GC ) { if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { // Check the precedence edges for (uint prec = n->req(); prec < n->len(); prec++) { Node* oop_store = n->in(prec); if (oop_store != NULL) { assert(get_block_for_node(oop_store)->_dom_depth <= block->_dom_depth, "oop_store must dominate card-mark"); } } } } #endif // A few node types require changing a required edge to a precedence edge // before allocation. if( n->is_Mach() && n->req() > TypeFunc::Parms && (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire || n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) { // MemBarAcquire could be created without Precedent edge. // del_req() replaces the specified edge with the last input edge // and then removes the last edge. If the specified edge > number of // edges the last edge will be moved outside of the input edges array // and the edge will be lost. This is why this code should be // executed only when Precedent (== TypeFunc::Parms) edge is present. Node *x = n->in(TypeFunc::Parms); n->del_req(TypeFunc::Parms); n->add_prec(x); } } } for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count ready_cnt.at_put(block->get_node(i2)->_idx, 0); // All the prescheduled guys do not hold back internal nodes uint i3; for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled Node *n = block->get_node(i3); // Get pre-scheduled for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); if (get_block_for_node(m) == block) { // Local-block user int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count } } } Node_List delay; // Make a worklist Node_List worklist; for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist Node *m = block->get_node(i4); if( !ready_cnt.at(m->_idx) ) { // Zero ready count? if (m->is_iteratively_computed()) { // Push induction variable increments last to allow other uses // of the phi to be scheduled first. The select() method breaks // ties in scheduling by worklist order. delay.push(m); } else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) { // Force the CreateEx to the top of the list so it's processed // first and ends up at the start of the block. worklist.insert(0, m); } else { worklist.push(m); // Then on to worklist! } } } while (delay.size()) { Node* d = delay.pop(); worklist.push(d); } // Warm up the 'next_call' heuristic bits needed_for_next_call(block, block->head(), next_call); #ifndef PRODUCT if (trace_opto_pipelining()) { for (uint j=0; j< block->number_of_nodes(); j++) { Node *n = block->get_node(j); int idx = n->_idx; tty->print("# ready cnt:%3d ", ready_cnt.at(idx)); tty->print("latency:%3d ", get_latency_for_node(n)); tty->print("%4d: %s\n", idx, n->Name()); } } #endif uint max_idx = (uint)ready_cnt.length(); // Pull from worklist and schedule while( worklist.size() ) { // Worklist is not ready #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("# ready list:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %d", n->_idx); } tty->cr(); } #endif // Select and pop a ready guy from worklist Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt); block->map_node(n, phi_cnt++); // Schedule him next #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print("# select %d: %s", n->_idx, n->Name()); tty->print(", latency:%d", get_latency_for_node(n)); n->dump(); if (Verbose) { tty->print("# ready list:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %d", n->_idx); } tty->cr(); } } #endif if( n->is_MachCall() ) { MachCallNode *mcall = n->as_MachCall(); phi_cnt = sched_call(block, phi_cnt, worklist, ready_cnt, mcall, next_call); continue; } if (n->is_Mach() && n->as_Mach()->has_call()) { RegMask regs; regs.Insert(_matcher.c_frame_pointer()); regs.OR(n->out_RegMask()); MachProjNode *proj = new (C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj ); map_node_to_block(proj, block); block->insert_node(proj, phi_cnt++); add_call_kills(proj, regs, _matcher._c_reg_save_policy, false); } // Children are now all ready for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { Node* m = n->fast_out(i5); // Get user if (get_block_for_node(m) != block) { continue; } if( m->is_Phi() ) continue; if (m->_idx >= max_idx) { // new node, skip it assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); continue; } int m_cnt = ready_cnt.at(m->_idx)-1; ready_cnt.at_put(m->_idx, m_cnt); if( m_cnt == 0 ) worklist.push(m); } } if( phi_cnt != block->end_idx() ) { // did not schedule all. Retry, Bailout, or Die if (C->subsume_loads() == true && !C->failing()) { // Retry with subsume_loads == false // If this is the first failure, the sentinel string will "stick" // to the Compile object, and the C2Compiler will see it and retry. C->record_failure(C2Compiler::retry_no_subsuming_loads()); } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; } #ifndef PRODUCT if (trace_opto_pipelining()) { tty->print_cr("#"); tty->print_cr("# after schedule_local"); for (uint i = 0;i < block->number_of_nodes();i++) { tty->print("# "); block->get_node(i)->fast_dump(); } tty->cr(); } #endif return true; }
//------------------------------schedule_local--------------------------------- // Topological sort within a block. Someday become a real scheduler. bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, int *ready_cnt, VectorSet &next_call) { // Already "sorted" are the block start Node (as the first entry), and // the block-ending Node and any trailing control projections. We leave // these alone. PhiNodes and ParmNodes are made to follow the block start // Node. Everything else gets topo-sorted. #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { tty->print_cr("# --- schedule_local B%d, before: ---", _pre_order); for (uint i = 0;i < _nodes.size();i++) { tty->print("# "); _nodes[i]->fast_dump(); } tty->print_cr("#"); } #endif // RootNode is already sorted if( _nodes.size() == 1 ) return true; // Move PhiNodes and ParmNodes from 1 to cnt up to the start uint node_cnt = end_idx(); uint phi_cnt = 1; uint i; for( i = 1; i<node_cnt; i++ ) { // Scan for Phi Node *n = _nodes[i]; if( n->is_Phi() || // Found a PhiNode or ParmNode (n->is_Proj() && n->in(0) == head()) ) { // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt _nodes.map(i,_nodes[phi_cnt]); _nodes.map(phi_cnt++,n); // swap Phi/Parm up front } else { // All others // Count block-local inputs to 'n' uint cnt = n->len(); // Input count uint local = 0; for( uint j=0; j<cnt; j++ ) { Node *m = n->in(j); if( m && cfg->_bbs[m->_idx] == this && !m->is_top() ) local++; // One more block-local input } ready_cnt[n->_idx] = local; // Count em up // A few node types require changing a required edge to a precedence edge // before allocation. if( UseConcMarkSweepGC || UseG1GC ) { if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) { // Note: Required edges with an index greater than oper_input_base // are not supported by the allocator. // Note2: Can only depend on unmatched edge being last, // can not depend on its absolute position. Node *oop_store = n->in(n->req() - 1); n->del_req(n->req() - 1); n->add_prec(oop_store); assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); } } if( n->is_Mach() && n->req() > TypeFunc::Parms && (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire || n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) { // MemBarAcquire could be created without Precedent edge. // del_req() replaces the specified edge with the last input edge // and then removes the last edge. If the specified edge > number of // edges the last edge will be moved outside of the input edges array // and the edge will be lost. This is why this code should be // executed only when Precedent (== TypeFunc::Parms) edge is present. Node *x = n->in(TypeFunc::Parms); n->del_req(TypeFunc::Parms); n->add_prec(x); } } } for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count ready_cnt[_nodes[i2]->_idx] = 0; // All the prescheduled guys do not hold back internal nodes uint i3; for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled Node *n = _nodes[i3]; // Get pre-scheduled for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); if( cfg->_bbs[m->_idx] ==this ) // Local-block user ready_cnt[m->_idx]--; // Fix ready count } } Node_List delay; // Make a worklist Node_List worklist; for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist Node *m = _nodes[i4]; if( !ready_cnt[m->_idx] ) { // Zero ready count? if (m->is_iteratively_computed()) { // Push induction variable increments last to allow other uses // of the phi to be scheduled first. The select() method breaks // ties in scheduling by worklist order. delay.push(m); } else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) { // Force the CreateEx to the top of the list so it's processed // first and ends up at the start of the block. worklist.insert(0, m); } else { worklist.push(m); // Then on to worklist! } } } while (delay.size()) { Node* d = delay.pop(); worklist.push(d); } // Warm up the 'next_call' heuristic bits needed_for_next_call(_nodes[0], next_call, cfg->_bbs); #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { for (uint j=0; j<_nodes.size(); j++) { Node *n = _nodes[j]; int idx = n->_idx; tty->print("# ready cnt:%3d ", ready_cnt[idx]); tty->print("latency:%3d ", cfg->_node_latency.at_grow(idx)); tty->print("%4d: %s\n", idx, n->Name()); } } #endif // Pull from worklist and schedule while( worklist.size() ) { // Worklist is not ready #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { tty->print("# ready list:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %d", n->_idx); } tty->cr(); } #endif // Select and pop a ready guy from worklist Node* n = select(cfg, worklist, ready_cnt, next_call, phi_cnt); _nodes.map(phi_cnt++,n); // Schedule him next #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { tty->print("# select %d: %s", n->_idx, n->Name()); tty->print(", latency:%d", cfg->_node_latency.at_grow(n->_idx)); n->dump(); if (Verbose) { tty->print("# ready list:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %d", n->_idx); } tty->cr(); } } #endif if( n->is_MachCall() ) { MachCallNode *mcall = n->as_MachCall(); phi_cnt = sched_call(matcher, cfg->_bbs, phi_cnt, worklist, ready_cnt, mcall, next_call); continue; } // Children are now all ready for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { Node* m = n->fast_out(i5); // Get user if( cfg->_bbs[m->_idx] != this ) continue; if( m->is_Phi() ) continue; if( !--ready_cnt[m->_idx] ) worklist.push(m); } } if( phi_cnt != end_idx() ) { // did not schedule all. Retry, Bailout, or Die Compile* C = matcher.C; if (C->subsume_loads() == true && !C->failing()) { // Retry with subsume_loads == false // If this is the first failure, the sentinel string will "stick" // to the Compile object, and the C2Compiler will see it and retry. C->record_failure(C2Compiler::retry_no_subsuming_loads()); } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; } #ifndef PRODUCT if (cfg->trace_opto_pipelining()) { tty->print_cr("#"); tty->print_cr("# after schedule_local"); for (uint i = 0;i < _nodes.size();i++) { tty->print("# "); _nodes[i]->fast_dump(); } tty->cr(); } #endif return true; }