// address of an element in _nodes. Used when the element is to be modified PointsToNode *ptnode_adr(uint idx) { if ((uint)_nodes->length() <= idx) { // expand _nodes array PointsToNode dummy = _nodes->at_grow(idx); } return _nodes->adr_at(idx); }
//------------------------------schedule_local--------------------------------- // Topological sort within a block. Someday become a real scheduler. bool Block::schedule_local(Matcher &matcher, Block_Array &bbs,int *ready_cnt, VectorSet &next_call, GrowableArray<uint> &node_latency) { // Already "sorted" are the block start Node (as the first entry), and // the block-ending Node and any trailing control projections. We leave // these alone. PhiNodes and ParmNodes are made to follow the block start // Node. Everything else gets topo-sorted. #ifndef PRODUCT if (TraceOptoPipelining) { tty->print("# before schedule_local\n"); for (uint i = 0;i < _nodes.size();i++) { tty->print("# "); _nodes[i]->fast_dump(); } tty->print("\n"); } #endif // RootNode is already sorted if( _nodes.size() == 1 ) return true; // Move PhiNodes and ParmNodes from 1 to cnt up to the start uint node_cnt = end_idx(); uint phi_cnt = 1; uint i; for( i = 1; i<node_cnt; i++ ) { // Scan for Phi Node *n = _nodes[i]; if( n->is_Phi() || // Found a PhiNode or ParmNode (n->is_Proj() && n->in(0) == head()) ) { // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt _nodes.map(i,_nodes[phi_cnt]); _nodes.map(phi_cnt++,n); // swap Phi/Parm up front } else { // All others // Count block-local inputs to 'n' uint cnt = n->len(); // Input count uint local = 0; for( uint j=0; j<cnt; j++ ) { Node *m = n->in(j); if( m && bbs[m->_idx] == this && !m->is_top() ) local++; // One more block-local input } ready_cnt[n->_idx] = local; // Count em up // A few node types require changing a required edge to a precedence edge // before allocation. MachNode *m = n->is_Mach(); if( UseConcMarkSweepGC ) { if( m && m->ideal_Opcode() == Op_StoreCM ) { // Note: Required edges with an index greater than oper_input_base // are not supported by the allocator. // Note2: Can only depend on unmatched edge being last, // can not depend on its absolute position. Node *oop_store = n->in(n->req() - 1); n->del_req(n->req() - 1); n->add_prec(oop_store); assert(bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); } } if( m && m->ideal_Opcode() == Op_MemBarAcquire ) { Node *x = n->in(TypeFunc::Parms); n->del_req(TypeFunc::Parms); n->add_prec(x); } } } for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count ready_cnt[_nodes[i2]->_idx] = 0; // All the prescheduled guys do not hold back internal nodes uint i3; for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled Node *n = _nodes[i3]; // Get pre-scheduled for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); if( bbs[m->_idx] ==this ) // Local-block user ready_cnt[m->_idx]--; // Fix ready count } } // Make a worklist Node_List worklist; for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist Node *m = _nodes[i4]; if( !ready_cnt[m->_idx] ) // Zero ready count? worklist.push(m); // Then on to worklist! } // Warm up the 'next_call' heuristic bits needed_for_next_call(_nodes[0], next_call, bbs); #ifndef PRODUCT if (TraceOptoPipelining) { for (uint j=0; j<_nodes.size(); j++) { Node *n = _nodes[j]; int idx = n->_idx; tty->print("# ready cnt:%3d ", ready_cnt[idx]); tty->print("latency:%3d ", node_latency.at_grow(idx)); tty->print("%4d: %s\n", idx, n->Name()); } } #endif // Pull from worklist and schedule while( worklist.size() ) { // Worklist is not ready #ifndef PRODUCT uint before_size = worklist.size(); if (TraceOptoPipelining && before_size > 1) { tty->print("# before select:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %3d", n->_idx); } tty->print("\n"); } #endif // Select and pop a ready guy from worklist Node* n = select(worklist, bbs, ready_cnt, next_call, phi_cnt, node_latency); _nodes.map(phi_cnt++,n); // Schedule him next MachNode *m = n->is_Mach(); #ifndef PRODUCT if (TraceOptoPipelining && before_size > 1) { tty->print("# select %d: %s", n->_idx, n->Name()); tty->print(", latency:%d", node_latency.at_grow(n->_idx)); n->dump(); tty->print("# after select:"); for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist Node *n = worklist[i]; // Get Node on worklist tty->print(" %4d", n->_idx); } tty->print("\n"); } #endif if( m ) { MachCallNode *mcall = m->is_MachCall(); if( mcall ) { phi_cnt = sched_call(matcher, bbs, phi_cnt, worklist, ready_cnt, mcall, next_call); continue; } } // Children are now all ready for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) { Node* m = n->fast_out(i5); // Get user if( bbs[m->_idx] != this ) continue; if( m->is_Phi() ) continue; if( !--ready_cnt[m->_idx] ) worklist.push(m); } } if( phi_cnt != end_idx() ) { // did not schedule all. Retry, Bailout, or Die Compile* C = matcher.C; if (C->subsume_loads() == true) { // Retry with subsume_loads == false C->set_result(Compile::Comp_subsumed_load_conflict); } else { // Bailout without retry C->set_result(Compile::Comp_no_retry); } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; } #ifndef PRODUCT if (TraceOptoPipelining) { tty->print("# after schedule_local\n"); for (uint i = 0;i < _nodes.size();i++) { tty->print("# "); _nodes[i]->fast_dump(); } tty->print("\n"); } #endif return true; }
//------------------------------implicit_null_check---------------------------- // Detect implicit-null-check opportunities. Basically, find NULL checks // with suitable memory ops nearby. Use the memory op to do the NULL check. // I can generate a memory op if there is not one nearby. void Block::implicit_null_check(Block_Array &bbs, GrowableArray<uint> &latency, Node *proj, Node *val) { // Assume if null check need for 0 offset then always needed // Intel solaris doesn't support any null checks yet and no // mechanism exists (yet) to set the switches at an os_cpu level if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return; // Make sure the ptr-is-null path appears to be uncommon! float f = end()->is_Mach()->is_MachIf()->_prob; if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f; if( f > 0.0001 ) return; uint bidx = 0; // Capture index of value into memop bool was_store; // Memory op is a store op // Search the successor block for a load or store who's base value is also // the tested value. There may be several. Node_List *out = new Node_List(Thread::current()->resource_area()); MachNode *best = NULL; // Best found so far for (DUIterator i = val->outs(); val->has_out(i); i++) { MachNode *mach = val->out(i)->is_Mach(); if( !mach ) continue; was_store = false; switch( mach->ideal_Opcode() ) { case Op_LoadB: case Op_LoadC: case Op_LoadD: case Op_LoadF: case Op_LoadI: case Op_LoadL: case Op_LoadP: case Op_LoadS: case Op_LoadKlass: case Op_LoadRange: case Op_LoadD_unaligned: case Op_LoadL_unaligned: break; case Op_StoreB: case Op_StoreC: case Op_StoreCM: case Op_StoreD: case Op_StoreF: case Op_StoreI: case Op_StoreL: case Op_StoreP: was_store = true; // Memory op is a store op // Stores will have their address in slot 2 (memory in slot 1). // If the value being nul-checked is in another slot, it means we // are storing the checked value, which does NOT check the value! if( mach->in(2) != val ) continue; break; // Found a memory op? case Op_StrComp: // Not a legit memory op for implicit null check regardless of // embedded loads continue; default: // Also check for embedded loads if( !mach->check_for_anti_dependence() ) continue; // Not an memory op; skip it break; } // check if the offset is not too high for implicit exception { intptr_t offset = 0; const TypePtr *adr_type = NULL; // Do not need this return value here const Node* base = mach->get_base_and_disp(offset, adr_type); if (base == NULL || base == (Node*)-1) { // cannot reason about it; is probably not implicit null exception } else { const TypePtr* tptr = base->bottom_type()->is_ptr(); // Give up if offset is not a compile-time constant if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot ) continue; offset += tptr->_offset; // correct if base is offseted if( MacroAssembler::needs_explicit_null_check(offset) ) continue; // Give up is reference is beyond 4K page size } } // Check ctrl input to see if the null-check dominates the memory op Block *cb = bbs[mach->_idx]; cb = cb->_idom; // Always hoist at least 1 block if( !was_store ) { // Stores can be hoisted only one block while( cb->_dom_depth > _dom_depth ) cb = cb->_idom; // Hoist loads as far as we want } if( cb != this ) continue; // Found a memory user; see if it can be hoisted to check-block uint vidx = 0; // Capture index of value into memop uint j; for( j = mach->req()-1; j > 0; j-- ) { if( mach->in(j) == val ) vidx = j; // Block of memory-op input Block *inb = bbs[mach->in(j)->_idx]; Block *b = this; // Start from nul check while( b != inb && b->_dom_depth > inb->_dom_depth ) b = b->_idom; // search upwards for input // See if input dominates null check if( b != inb ) break; } if( j > 0 ) continue; Block *mb = bbs[mach->_idx]; // Hoisting stores requires more checks for the anti-dependence case. // Give up hoisting if we have to move the store past any load. if( was_store ) { Block *b = mb; // Start searching here for a local load // mach use (faulting) trying to hoist // n might be blocker to hoisting while( b != this ) { uint k; for( k = 1; k < b->_nodes.size(); k++ ) { Node *n = b->_nodes[k]; if( n->check_for_anti_dependence() && n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) ) break; // Found anti-dependent load } if( k < b->_nodes.size() ) break; // Found anti-dependent load // Make sure control does not do a merge (would have to check allpaths) if( b->num_preds() != 2 ) break; b = bbs[b->pred(1)->_idx]; // Move up to predecessor block } if( b != this ) continue; } // Make sure this memory op is not already being used for a NullCheck MachNode *e = mb->end()->is_Mach(); if( e && e->is_MachNullCheck() && e->in(1) == mach ) continue; // Already being used as a NULL check // Found a candidate! Pick one with least dom depth - the highest // in the dom tree should be closest to the null check. if( !best || bbs[mach->_idx]->_dom_depth < bbs[best->_idx]->_dom_depth ) { best = mach; bidx = vidx; } } // No candidate! if( !best ) return; // ---- Found an implicit null check extern int implicit_null_checks; implicit_null_checks++; // Hoist the memory candidate up to the end of the test block. Block *old_block = bbs[best->_idx]; old_block->find_remove(best); add_inst(best); bbs.map(best->_idx,this); // Move the control dependence if (best->in(0) && best->in(0) == old_block->_nodes[0]) best->set_req(0, _nodes[0]); // Check for flag-killing projections that also need to be hoisted // Should be DU safe because no edge updates. for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) { Node* n = best->fast_out(j); if( n->Opcode() == Op_MachProj ) { bbs[n->_idx]->find_remove(n); add_inst(n); bbs.map(n->_idx,this); } } // proj==Op_True --> ne test; proj==Op_False --> eq test. // One of two graph shapes got matched: // (IfTrue (If (Bool NE (CmpP ptr NULL)))) // (IfFalse (If (Bool EQ (CmpP ptr NULL)))) // NULL checks are always branch-if-eq. If we see a IfTrue projection // then we are replacing a 'ne' test with a 'eq' NULL check test. // We need to flip the projections to keep the same semantics. if( proj->Opcode() == Op_IfTrue ) { // Swap order of projections in basic block to swap branch targets Node *tmp1 = _nodes[end_idx()+1]; Node *tmp2 = _nodes[end_idx()+2]; _nodes.map(end_idx()+1, tmp2); _nodes.map(end_idx()+2, tmp1); Node *tmp = new (1) Node(1); tmp1->replace_by(tmp); tmp2->replace_by(tmp1); tmp->replace_by(tmp2); } // Remove the existing null check; use a new implicit null check instead. // Since schedule-local needs precise def-use info, we need to correct // it as well. Node *old_tst = proj->in(0); MachNode *nul_chk = new MachNullCheckNode(old_tst->in(0),best,bidx); _nodes.map(end_idx(),nul_chk); bbs.map(nul_chk->_idx,this); // Redirect users of old_test to nul_chk for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2) old_tst->last_out(i2)->set_req(0, nul_chk); // Clean-up any dead code for (uint i3 = 0; i3 < old_tst->req(); i3++) old_tst->set_req(i3, NULL); latency.at_put_grow(nul_chk->_idx, nul_chk->latency_from_uses(bbs, latency)); latency.at_put_grow(best ->_idx, best ->latency_from_uses(bbs, latency)); #ifndef PRODUCT if (TraceOptoPipelining) { tty->print("# implicit_null_check: latency %4d for ", latency.at_grow(best->_idx)); best->fast_dump(); tty->print("# implicit_null_check: latency %4d for ", latency.at_grow(nul_chk->_idx)); nul_chk->fast_dump(); } #endif }
bool hidden_alias(Node *n) { if (_collecting) return true; PointsToNode ptn = _nodes->at_grow(n->_idx); return (ptn.escape_state() != PointsToNode::NoEscape) || ptn._hidden_alias; }