Ejemplo n.º 1
0
 // address of an element in _nodes.  Used when the element is to be modified
 PointsToNode *ptnode_adr(uint idx) {
   if ((uint)_nodes->length() <= idx) {
     // expand _nodes array
     PointsToNode dummy = _nodes->at_grow(idx);
   }
   return _nodes->adr_at(idx);
 }
Ejemplo n.º 2
0
//------------------------------schedule_local---------------------------------
// Topological sort within a block.  Someday become a real scheduler.
bool Block::schedule_local(Matcher &matcher, Block_Array &bbs,int *ready_cnt, VectorSet &next_call, GrowableArray<uint> &node_latency) {
  // Already "sorted" are the block start Node (as the first entry), and
  // the block-ending Node and any trailing control projections.  We leave
  // these alone.  PhiNodes and ParmNodes are made to follow the block start
  // Node.  Everything else gets topo-sorted.

#ifndef PRODUCT
    if (TraceOptoPipelining) {
      tty->print("# before schedule_local\n");
      for (uint i = 0;i < _nodes.size();i++) {
        tty->print("# ");
        _nodes[i]->fast_dump();
      }
      tty->print("\n");
    }
#endif

  // RootNode is already sorted
  if( _nodes.size() == 1 ) return true;

  // Move PhiNodes and ParmNodes from 1 to cnt up to the start
  uint node_cnt = end_idx();
  uint phi_cnt = 1;
  uint i;
  for( i = 1; i<node_cnt; i++ ) { // Scan for Phi
    Node *n = _nodes[i];
    if( n->is_Phi() ||          // Found a PhiNode or ParmNode
        (n->is_Proj()  && n->in(0) == head()) ) {
      // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt
      _nodes.map(i,_nodes[phi_cnt]);
      _nodes.map(phi_cnt++,n);  // swap Phi/Parm up front
    } else {                    // All others
      // Count block-local inputs to 'n'
      uint cnt = n->len();      // Input count
      uint local = 0;
      for( uint j=0; j<cnt; j++ ) {
        Node *m = n->in(j);
        if( m && bbs[m->_idx] == this && !m->is_top() )
          local++;              // One more block-local input
      }
      ready_cnt[n->_idx] = local; // Count em up

      // A few node types require changing a required edge to a precedence edge
      // before allocation.
      MachNode *m = n->is_Mach();
      if( UseConcMarkSweepGC ) {
        if( m && m->ideal_Opcode() == Op_StoreCM ) {
          // Note: Required edges with an index greater than oper_input_base
          // are not supported by the allocator.
          // Note2: Can only depend on unmatched edge being last,
          // can not depend on its absolute position.
          Node *oop_store = n->in(n->req() - 1);
          n->del_req(n->req() - 1);
          n->add_prec(oop_store);
          assert(bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark");
        }
      }
      if( m && m->ideal_Opcode() == Op_MemBarAcquire ) {
        Node *x = n->in(TypeFunc::Parms);
        n->del_req(TypeFunc::Parms);
        n->add_prec(x);
      }
    }
  }
  for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count
    ready_cnt[_nodes[i2]->_idx] = 0;

  // All the prescheduled guys do not hold back internal nodes
  uint i3;
  for(i3 = 0; i3<phi_cnt; i3++ ) {  // For all pre-scheduled
    Node *n = _nodes[i3];       // Get pre-scheduled
    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
      Node* m = n->fast_out(j);
      if( bbs[m->_idx] ==this ) // Local-block user
        ready_cnt[m->_idx]--;   // Fix ready count
    }
  }

  // Make a worklist
  Node_List worklist;
  for(uint i4=i3; i4<node_cnt; i4++ ) {    // Put ready guys on worklist
    Node *m = _nodes[i4];    
    if( !ready_cnt[m->_idx] )   // Zero ready count?
      worklist.push(m);         // Then on to worklist!
  }

  // Warm up the 'next_call' heuristic bits
  needed_for_next_call(_nodes[0], next_call, bbs);

#ifndef PRODUCT
    if (TraceOptoPipelining) {
      for (uint j=0; j<_nodes.size(); j++) {
        Node     *n = _nodes[j];
        int     idx = n->_idx;
        tty->print("#   ready cnt:%3d  ", ready_cnt[idx]);
        tty->print("latency:%3d  ", node_latency.at_grow(idx));
        tty->print("%4d: %s\n", idx, n->Name());
      }
    }
#endif

  // Pull from worklist and schedule
  while( worklist.size() ) {    // Worklist is not ready

#ifndef PRODUCT
    uint before_size = worklist.size();

    if (TraceOptoPipelining && before_size > 1) {
      tty->print("#    before select:");
      for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
        Node *n = worklist[i];      // Get Node on worklist
        tty->print(" %3d", n->_idx);
      }
      tty->print("\n");
    }
#endif

    // Select and pop a ready guy from worklist
    Node* n = select(worklist, bbs, ready_cnt, next_call, phi_cnt, node_latency);
    _nodes.map(phi_cnt++,n);    // Schedule him next
    MachNode *m = n->is_Mach();

#ifndef PRODUCT
    if (TraceOptoPipelining && before_size > 1) {
      tty->print("#  select %d: %s", n->_idx, n->Name());
      tty->print(", latency:%d", node_latency.at_grow(n->_idx));
      n->dump();
      tty->print("#    after select:");
      for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
        Node *n = worklist[i];      // Get Node on worklist
        tty->print(" %4d", n->_idx);
      }
      tty->print("\n");
    }

#endif
    if( m ) {
      MachCallNode *mcall = m->is_MachCall();
      if( mcall ) {
        phi_cnt = sched_call(matcher, bbs, phi_cnt, worklist, ready_cnt, mcall, next_call);
        continue;
      }
    }
    // Children are now all ready
    for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) {
      Node* m = n->fast_out(i5); // Get user
      if( bbs[m->_idx] != this ) continue;
      if( m->is_Phi() ) continue;
      if( !--ready_cnt[m->_idx] ) 
        worklist.push(m);
    }
  }

  if( phi_cnt != end_idx() ) {
    // did not schedule all.  Retry, Bailout, or Die
    Compile* C = matcher.C;
    if (C->subsume_loads() == true) {
      // Retry with subsume_loads == false
      C->set_result(Compile::Comp_subsumed_load_conflict);
    } else {
      // Bailout without retry
      C->set_result(Compile::Comp_no_retry);
    }
    // assert( phi_cnt == end_idx(), "did not schedule all" );
    return false;
  }

#ifndef PRODUCT
  if (TraceOptoPipelining) {
    tty->print("# after schedule_local\n");
    for (uint i = 0;i < _nodes.size();i++) {
      tty->print("# ");
      _nodes[i]->fast_dump();
    }
    tty->print("\n");
  }
#endif


  return true;
}
Ejemplo n.º 3
0
//------------------------------implicit_null_check----------------------------
// Detect implicit-null-check opportunities.  Basically, find NULL checks 
// with suitable memory ops nearby.  Use the memory op to do the NULL check.
// I can generate a memory op if there is not one nearby.
void Block::implicit_null_check(Block_Array &bbs, GrowableArray<uint> &latency, Node *proj, Node *val) {
  // Assume if null check need for 0 offset then always needed
  // Intel solaris doesn't support any null checks yet and no
  // mechanism exists (yet) to set the switches at an os_cpu level
  if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return;

  // Make sure the ptr-is-null path appears to be uncommon!
  float f = end()->is_Mach()->is_MachIf()->_prob;
  if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f;
  if( f > 0.0001 ) return;

  uint bidx = 0;                // Capture index of value into memop
  bool was_store;               // Memory op is a store op

  // Search the successor block for a load or store who's base value is also
  // the tested value.  There may be several.
  Node_List *out = new Node_List(Thread::current()->resource_area());
  MachNode *best = NULL;        // Best found so far
  for (DUIterator i = val->outs(); val->has_out(i); i++) {
    MachNode *mach = val->out(i)->is_Mach();
    if( !mach ) continue;
    was_store = false;
    switch( mach->ideal_Opcode() ) {
    case Op_LoadB:
    case Op_LoadC:
    case Op_LoadD:
    case Op_LoadF:
    case Op_LoadI:
    case Op_LoadL:
    case Op_LoadP:
    case Op_LoadS:
    case Op_LoadKlass:
    case Op_LoadRange:
    case Op_LoadD_unaligned:
    case Op_LoadL_unaligned:
      break;
    case Op_StoreB:
    case Op_StoreC:
    case Op_StoreCM:
    case Op_StoreD:
    case Op_StoreF:
    case Op_StoreI:
    case Op_StoreL:
    case Op_StoreP:
      was_store = true;         // Memory op is a store op
      // Stores will have their address in slot 2 (memory in slot 1).
      // If the value being nul-checked is in another slot, it means we
      // are storing the checked value, which does NOT check the value!
      if( mach->in(2) != val ) continue;
      break;                    // Found a memory op?
    case Op_StrComp:		
      // Not a legit memory op for implicit null check regardless of 
      // embedded loads
      continue;
    default:                    // Also check for embedded loads
      if( !mach->check_for_anti_dependence() )
        continue;               // Not an memory op; skip it
      break;
    }
    // check if the offset is not too high for implicit exception
    {
      intptr_t offset = 0;
      const TypePtr *adr_type = NULL;  // Do not need this return value here
      const Node* base = mach->get_base_and_disp(offset, adr_type);
      if (base == NULL || base == (Node*)-1) {
        // cannot reason about it; is probably not implicit null exception
      } else {
        const TypePtr* tptr = base->bottom_type()->is_ptr();
        // Give up if offset is not a compile-time constant
        if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )
          continue;
        offset += tptr->_offset; // correct if base is offseted
	if( MacroAssembler::needs_explicit_null_check(offset) ) 
          continue;             // Give up is reference is beyond 4K page size
      }
    }

    // Check ctrl input to see if the null-check dominates the memory op
    Block *cb = bbs[mach->_idx];
    cb = cb->_idom;		// Always hoist at least 1 block
    if( !was_store ) {		// Stores can be hoisted only one block
      while( cb->_dom_depth > _dom_depth )
        cb = cb->_idom;		// Hoist loads as far as we want
    }
    if( cb != this ) continue;

    // Found a memory user; see if it can be hoisted to check-block
    uint vidx = 0;              // Capture index of value into memop
    uint j;
    for( j = mach->req()-1; j > 0; j-- ) {
      if( mach->in(j) == val ) vidx = j;
      // Block of memory-op input
      Block *inb = bbs[mach->in(j)->_idx];
      Block *b = this;          // Start from nul check
      while( b != inb && b->_dom_depth > inb->_dom_depth )
        b = b->_idom;           // search upwards for input
      // See if input dominates null check
      if( b != inb )
        break;
    }
    if( j > 0 ) 
      continue;
    Block *mb = bbs[mach->_idx]; 
    // Hoisting stores requires more checks for the anti-dependence case.
    // Give up hoisting if we have to move the store past any load.
    if( was_store ) {
      Block *b = mb;            // Start searching here for a local load
      // mach use (faulting) trying to hoist
      // n might be blocker to hoisting
      while( b != this ) {
        uint k;
        for( k = 1; k < b->_nodes.size(); k++ ) {
          Node *n = b->_nodes[k];
          if( n->check_for_anti_dependence() && 
              n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) )
	    break;              // Found anti-dependent load
        }
        if( k < b->_nodes.size() )
          break;                // Found anti-dependent load
        // Make sure control does not do a merge (would have to check allpaths)
        if( b->num_preds() != 2 ) break;
        b = bbs[b->pred(1)->_idx]; // Move up to predecessor block
      }
      if( b != this ) continue;
    }

    // Make sure this memory op is not already being used for a NullCheck
    MachNode *e = mb->end()->is_Mach();
    if( e && e->is_MachNullCheck() && e->in(1) == mach )
      continue;                 // Already being used as a NULL check

    // Found a candidate!  Pick one with least dom depth - the highest 
    // in the dom tree should be closest to the null check.
    if( !best || 
        bbs[mach->_idx]->_dom_depth < bbs[best->_idx]->_dom_depth ) {
      best = mach;
      bidx = vidx;

    }
  }
  // No candidate!
  if( !best ) return;

  // ---- Found an implicit null check
  extern int implicit_null_checks;
  implicit_null_checks++;

  // Hoist the memory candidate up to the end of the test block.
  Block *old_block = bbs[best->_idx];
  old_block->find_remove(best);
  add_inst(best);
  bbs.map(best->_idx,this);

  // Move the control dependence
  if (best->in(0) && best->in(0) == old_block->_nodes[0])
    best->set_req(0, _nodes[0]);

  // Check for flag-killing projections that also need to be hoisted
  // Should be DU safe because no edge updates.
  for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) {
    Node* n = best->fast_out(j);
    if( n->Opcode() == Op_MachProj ) {
      bbs[n->_idx]->find_remove(n);
      add_inst(n);
      bbs.map(n->_idx,this);
    }
  }

  // proj==Op_True --> ne test; proj==Op_False --> eq test.
  // One of two graph shapes got matched:
  //   (IfTrue  (If (Bool NE (CmpP ptr NULL))))
  //   (IfFalse (If (Bool EQ (CmpP ptr NULL))))
  // NULL checks are always branch-if-eq.  If we see a IfTrue projection
  // then we are replacing a 'ne' test with a 'eq' NULL check test.
  // We need to flip the projections to keep the same semantics.
  if( proj->Opcode() == Op_IfTrue ) {
    // Swap order of projections in basic block to swap branch targets
    Node *tmp1 = _nodes[end_idx()+1];
    Node *tmp2 = _nodes[end_idx()+2];
    _nodes.map(end_idx()+1, tmp2);
    _nodes.map(end_idx()+2, tmp1);    
    Node *tmp = new (1) Node(1);
    tmp1->replace_by(tmp);
    tmp2->replace_by(tmp1);
    tmp->replace_by(tmp2);
  }

  // Remove the existing null check; use a new implicit null check instead.
  // Since schedule-local needs precise def-use info, we need to correct
  // it as well.
  Node *old_tst = proj->in(0);
  MachNode *nul_chk = new MachNullCheckNode(old_tst->in(0),best,bidx);
  _nodes.map(end_idx(),nul_chk);
  bbs.map(nul_chk->_idx,this);
  // Redirect users of old_test to nul_chk
  for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2)
    old_tst->last_out(i2)->set_req(0, nul_chk);
  // Clean-up any dead code
  for (uint i3 = 0; i3 < old_tst->req(); i3++)
    old_tst->set_req(i3, NULL);
  latency.at_put_grow(nul_chk->_idx, nul_chk->latency_from_uses(bbs, latency));
  latency.at_put_grow(best   ->_idx, best   ->latency_from_uses(bbs, latency));

#ifndef PRODUCT
  if (TraceOptoPipelining) {
    tty->print("# implicit_null_check: latency %4d for ", latency.at_grow(best->_idx));
    best->fast_dump();
    tty->print("# implicit_null_check: latency %4d for ", latency.at_grow(nul_chk->_idx));
    nul_chk->fast_dump();
  }
#endif
}
Ejemplo n.º 4
0
 bool hidden_alias(Node *n) {
   if (_collecting)
     return true;
   PointsToNode  ptn = _nodes->at_grow(n->_idx);
   return (ptn.escape_state() != PointsToNode::NoEscape) || ptn._hidden_alias;
 }