Example #1
0
//------------------------------sched_call-------------------------------------
uint Block::sched_call( Matcher &m, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) {
  RegMask regs;

  // Schedule all the users of the call right now.  All the users are
  // projection Nodes, so they must be scheduled next to the call.
  // Collect all the defined registers.
  for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) {
    Node* n = mcall->fast_out(i);
    assert( n->Opcode()==Op_MachProj, "" );
    --ready_cnt[n->_idx];
    assert( !ready_cnt[n->_idx], "" );
    // Schedule next to call
    _nodes.map(node_cnt++, n);
    // Collect defined registers
    regs.OR(n->out_RegMask());
    // Check for scheduling the next control-definer
    if( n->bottom_type() == Type::CONTROL ) 
      // Warm up next pile of heuristic bits
      needed_for_next_call(n, next_call, bbs);

    // Children of projections are now all ready
    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
      Node* m = n->fast_out(j); // Get user
      if( bbs[m->_idx] != this ) continue;
      if( m->is_Phi() ) continue;
      if( !--ready_cnt[m->_idx] ) 
        worklist.push(m);
    }
  
  }

  // Act as if the call defines the Frame Pointer.
  // Certainly the FP is alive and well after the call.
  regs.Insert(m.c_frame_pointer());

  // Set all registers killed and not already defined by the call.
  uint r_cnt = mcall->tf()->range()->cnt();
  int op = mcall->ideal_Opcode();
  MachProjNode *proj = new (1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );
  bbs.map(proj->_idx,this);
  _nodes.insert(node_cnt++, proj);

  for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) {
    if( !regs.Member(r) ) {     // Not already defined by the call  
      // Save-on-call register?
      if( (m._register_save_policy[r] == 'C') ||
          (m._register_save_policy[r] == 'A') ||
          ((m._register_save_policy[r] == 'E') &&
           (op == Op_CallRuntime     ||
            op == Op_CallNative      ||
            op == Op_CallInterpreter ||
            op == Op_CallLeaf)) ) { 
        proj->_rout.Insert(r);
      }
    }
  }

  return node_cnt;
}
Example #2
0
//------------------------------call_catch_cleanup-----------------------------
// If we inserted any instructions between a Call and his CatchNode,
// clone the instructions on all paths below the Catch.
void Block::call_catch_cleanup(Block_Array &bbs) {

  // End of region to clone
  uint end = end_idx();
  if( !_nodes[end]->is_Catch() ) return;
  // Start of region to clone
  uint beg = end;
  while( _nodes[beg-1]->Opcode() != Op_MachProj || 
        !_nodes[beg-1]->in(0)->is_Call() ) {
    beg--;
    assert(beg > 0,"Catch cleanup walking beyond block boundry");
  }
  if( beg == end ) return;

  // Clone along all Catch output paths.  Clone area between the 'beg' and
  // 'end' indices.
  for( uint i = 0; i < _num_succs; i++ ) {
    Block *sb = _succs[i];
    // Clone the entire area; ignoring the edge fixup for now.
    for( uint j = end; j > beg; j-- ) {
      Node *clone = _nodes[j-1]->clone();
      sb->_nodes.insert( 1, clone );
      bbs.map(clone->_idx,sb);
    }
  }


  // Fixup edges.  Check the def-use info per cloned Node
  for(uint i2 = beg; i2 < end; i2++ ) {
    uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block
    Node *n = _nodes[i2];        // Node that got cloned
    // Need DU safe iterator because of edge manipulation in calls.
    Node_List *out = new Node_List(Thread::current()->resource_area());
    for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) {
      out->push(n->fast_out(j1));
    }
    uint max = out->size();
    for (uint j = 0; j < max; j++) {// For all users
      Node *use = out->pop();
      Block *buse = bbs[use->_idx];
      if( use->is_Phi() ) {
        for( uint k = 1; k < use->req(); k++ )
          if( use->in(k) == n ) 
            catch_cleanup_one_use( use, bbs[buse->pred(k)->_idx], n, this, bbs, beg, n_clone_idx, k );
      } else {
        catch_cleanup_one_use( use, buse, n, this, bbs, beg, n_clone_idx, -1 );
      }
    } // End for all users

  } // End of for all Nodes in cloned area

  // Remove the now-dead cloned ops
  for(uint i3 = beg; i3 < end; i3++ ) {
    _nodes[beg]->disconnect_inputs(NULL);
    _nodes.remove(beg);
  }

  // If the successor blocks have a CreateEx node, move it back to the top
  for(uint i4 = 0; i4 < _num_succs; i4++ ) {
    Block *sb = _succs[i4];
    MachNode *cex = sb->_nodes[1+end-beg]->is_Mach();
    if( cex && cex->ideal_Opcode() == Op_CreateEx ) {
      sb->_nodes.remove(1+end-beg);
      sb->_nodes.insert(1,cex);
    }
  }
}
Example #3
0
//------------------------------catch_cleanup_one_use--------------------------
static void catch_cleanup_one_use( Node *use, Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int beg, int n_clone_idx, int use_idx ) {
  if( !use_blk ) return;        // Can happen if the use is a precedence edge
  // Check out 'use'.  If it is in this block, then it must be in
  // the cloned area.  Go to the clone(s) and set them to use the
  // cloned version of 'n'.
  if( use_blk == def_blk ) {
    uint use_idx = def_blk->find_node(use);
    uint offset_idx = use_idx - beg;
    for( uint k = 0; k < def_blk->_num_succs; k++ ) {
      // Get clone in each successor block
      Block *sb = def_blk->_succs[k];
      Node *clone = sb->_nodes[offset_idx+1];
      assert( clone->Opcode() == use->Opcode(), "" );
      // Make use-clone use the def-clone
      for( uint l = 0; l < use->len(); l++ ) {
        if( clone->in(l) == def ) {
	  if( l < use->req() ) {
	    clone->set_req(l,sb->_nodes[n_clone_idx]);
	  } else {
	    clone->rm_prec(l);
	    clone->add_prec(sb->_nodes[n_clone_idx]);
	    l--;
	  }
	}
      }
    }
    
  } 
  // Else the use is some block below the Catch.  Find the path the value
  // takes to reach the Catch and make the use occur only on this path.
  else {
    // Find which successor block dominates this use.  The successor
    // blocks must all be single-entry (from the Catch only; I will have
    // split blocks to make this so), hence they all dominate.
    while( use_blk->_dom_depth > def_blk->_dom_depth+1 )
      use_blk = use_blk->_idom;
    
    // Find the successor
    Node *fixup;
    uint j;
    for( j = 0; j < def_blk->_num_succs; j++ )
      if( use_blk == def_blk->_succs[j] ) 
        break;
    if( j == def_blk->_num_succs ) {
      // Block at same level in dom-tree is not a successor.  It needs a 
      // PhiNode, the PhiNode uses from the def and IT's uses need fixup.
      Node *phi = PhiNode::make(use_blk->head(), def);
      use_blk->_nodes.insert( 1, phi );
      bbs.map(phi->_idx,use_blk);
      for(uint j3 = 1; j3 < use_blk->num_preds(); j3++ )
        catch_cleanup_one_use( phi, bbs[use_blk->pred(j3)->_idx], def, def_blk, bbs, beg, n_clone_idx, j3 );
      fixup = phi;

    } else {
      // Found the use just below the Catch.  Make it use the clone.
      fixup = def_blk->_succs[j]->_nodes[n_clone_idx];
    }

    if( use_idx >= 0 ) {
      use->set_req(use_idx,fixup);
    } else {
      for( uint l = 0; l < use->len(); l++ ) {
        if( use->in(l) == def ) {
	  if( l < use->req() ) {
	    use->set_req(l,fixup);
	  } else {
	    use->rm_prec(l);
            use->add_prec(fixup);
	    l--;
	  }
	}
      }
    }
  }
}
Example #4
0
//------------------------------implicit_null_check----------------------------
// Detect implicit-null-check opportunities.  Basically, find NULL checks 
// with suitable memory ops nearby.  Use the memory op to do the NULL check.
// I can generate a memory op if there is not one nearby.
void Block::implicit_null_check(Block_Array &bbs, GrowableArray<uint> &latency, Node *proj, Node *val) {
  // Assume if null check need for 0 offset then always needed
  // Intel solaris doesn't support any null checks yet and no
  // mechanism exists (yet) to set the switches at an os_cpu level
  if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return;

  // Make sure the ptr-is-null path appears to be uncommon!
  float f = end()->is_Mach()->is_MachIf()->_prob;
  if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f;
  if( f > 0.0001 ) return;

  uint bidx = 0;                // Capture index of value into memop
  bool was_store;               // Memory op is a store op

  // Search the successor block for a load or store who's base value is also
  // the tested value.  There may be several.
  Node_List *out = new Node_List(Thread::current()->resource_area());
  MachNode *best = NULL;        // Best found so far
  for (DUIterator i = val->outs(); val->has_out(i); i++) {
    MachNode *mach = val->out(i)->is_Mach();
    if( !mach ) continue;
    was_store = false;
    switch( mach->ideal_Opcode() ) {
    case Op_LoadB:
    case Op_LoadC:
    case Op_LoadD:
    case Op_LoadF:
    case Op_LoadI:
    case Op_LoadL:
    case Op_LoadP:
    case Op_LoadS:
    case Op_LoadKlass:
    case Op_LoadRange:
    case Op_LoadD_unaligned:
    case Op_LoadL_unaligned:
      break;
    case Op_StoreB:
    case Op_StoreC:
    case Op_StoreCM:
    case Op_StoreD:
    case Op_StoreF:
    case Op_StoreI:
    case Op_StoreL:
    case Op_StoreP:
      was_store = true;         // Memory op is a store op
      // Stores will have their address in slot 2 (memory in slot 1).
      // If the value being nul-checked is in another slot, it means we
      // are storing the checked value, which does NOT check the value!
      if( mach->in(2) != val ) continue;
      break;                    // Found a memory op?
    case Op_StrComp:		
      // Not a legit memory op for implicit null check regardless of 
      // embedded loads
      continue;
    default:                    // Also check for embedded loads
      if( !mach->check_for_anti_dependence() )
        continue;               // Not an memory op; skip it
      break;
    }
    // check if the offset is not too high for implicit exception
    {
      intptr_t offset = 0;
      const TypePtr *adr_type = NULL;  // Do not need this return value here
      const Node* base = mach->get_base_and_disp(offset, adr_type);
      if (base == NULL || base == (Node*)-1) {
        // cannot reason about it; is probably not implicit null exception
      } else {
        const TypePtr* tptr = base->bottom_type()->is_ptr();
        // Give up if offset is not a compile-time constant
        if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )
          continue;
        offset += tptr->_offset; // correct if base is offseted
	if( MacroAssembler::needs_explicit_null_check(offset) ) 
          continue;             // Give up is reference is beyond 4K page size
      }
    }

    // Check ctrl input to see if the null-check dominates the memory op
    Block *cb = bbs[mach->_idx];
    cb = cb->_idom;		// Always hoist at least 1 block
    if( !was_store ) {		// Stores can be hoisted only one block
      while( cb->_dom_depth > _dom_depth )
        cb = cb->_idom;		// Hoist loads as far as we want
    }
    if( cb != this ) continue;

    // Found a memory user; see if it can be hoisted to check-block
    uint vidx = 0;              // Capture index of value into memop
    uint j;
    for( j = mach->req()-1; j > 0; j-- ) {
      if( mach->in(j) == val ) vidx = j;
      // Block of memory-op input
      Block *inb = bbs[mach->in(j)->_idx];
      Block *b = this;          // Start from nul check
      while( b != inb && b->_dom_depth > inb->_dom_depth )
        b = b->_idom;           // search upwards for input
      // See if input dominates null check
      if( b != inb )
        break;
    }
    if( j > 0 ) 
      continue;
    Block *mb = bbs[mach->_idx]; 
    // Hoisting stores requires more checks for the anti-dependence case.
    // Give up hoisting if we have to move the store past any load.
    if( was_store ) {
      Block *b = mb;            // Start searching here for a local load
      // mach use (faulting) trying to hoist
      // n might be blocker to hoisting
      while( b != this ) {
        uint k;
        for( k = 1; k < b->_nodes.size(); k++ ) {
          Node *n = b->_nodes[k];
          if( n->check_for_anti_dependence() && 
              n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) )
	    break;              // Found anti-dependent load
        }
        if( k < b->_nodes.size() )
          break;                // Found anti-dependent load
        // Make sure control does not do a merge (would have to check allpaths)
        if( b->num_preds() != 2 ) break;
        b = bbs[b->pred(1)->_idx]; // Move up to predecessor block
      }
      if( b != this ) continue;
    }

    // Make sure this memory op is not already being used for a NullCheck
    MachNode *e = mb->end()->is_Mach();
    if( e && e->is_MachNullCheck() && e->in(1) == mach )
      continue;                 // Already being used as a NULL check

    // Found a candidate!  Pick one with least dom depth - the highest 
    // in the dom tree should be closest to the null check.
    if( !best || 
        bbs[mach->_idx]->_dom_depth < bbs[best->_idx]->_dom_depth ) {
      best = mach;
      bidx = vidx;

    }
  }
  // No candidate!
  if( !best ) return;

  // ---- Found an implicit null check
  extern int implicit_null_checks;
  implicit_null_checks++;

  // Hoist the memory candidate up to the end of the test block.
  Block *old_block = bbs[best->_idx];
  old_block->find_remove(best);
  add_inst(best);
  bbs.map(best->_idx,this);

  // Move the control dependence
  if (best->in(0) && best->in(0) == old_block->_nodes[0])
    best->set_req(0, _nodes[0]);

  // Check for flag-killing projections that also need to be hoisted
  // Should be DU safe because no edge updates.
  for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) {
    Node* n = best->fast_out(j);
    if( n->Opcode() == Op_MachProj ) {
      bbs[n->_idx]->find_remove(n);
      add_inst(n);
      bbs.map(n->_idx,this);
    }
  }

  // proj==Op_True --> ne test; proj==Op_False --> eq test.
  // One of two graph shapes got matched:
  //   (IfTrue  (If (Bool NE (CmpP ptr NULL))))
  //   (IfFalse (If (Bool EQ (CmpP ptr NULL))))
  // NULL checks are always branch-if-eq.  If we see a IfTrue projection
  // then we are replacing a 'ne' test with a 'eq' NULL check test.
  // We need to flip the projections to keep the same semantics.
  if( proj->Opcode() == Op_IfTrue ) {
    // Swap order of projections in basic block to swap branch targets
    Node *tmp1 = _nodes[end_idx()+1];
    Node *tmp2 = _nodes[end_idx()+2];
    _nodes.map(end_idx()+1, tmp2);
    _nodes.map(end_idx()+2, tmp1);    
    Node *tmp = new (1) Node(1);
    tmp1->replace_by(tmp);
    tmp2->replace_by(tmp1);
    tmp->replace_by(tmp2);
  }

  // Remove the existing null check; use a new implicit null check instead.
  // Since schedule-local needs precise def-use info, we need to correct
  // it as well.
  Node *old_tst = proj->in(0);
  MachNode *nul_chk = new MachNullCheckNode(old_tst->in(0),best,bidx);
  _nodes.map(end_idx(),nul_chk);
  bbs.map(nul_chk->_idx,this);
  // Redirect users of old_test to nul_chk
  for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2)
    old_tst->last_out(i2)->set_req(0, nul_chk);
  // Clean-up any dead code
  for (uint i3 = 0; i3 < old_tst->req(); i3++)
    old_tst->set_req(i3, NULL);
  latency.at_put_grow(nul_chk->_idx, nul_chk->latency_from_uses(bbs, latency));
  latency.at_put_grow(best   ->_idx, best   ->latency_from_uses(bbs, latency));

#ifndef PRODUCT
  if (TraceOptoPipelining) {
    tty->print("# implicit_null_check: latency %4d for ", latency.at_grow(best->_idx));
    best->fast_dump();
    tty->print("# implicit_null_check: latency %4d for ", latency.at_grow(nul_chk->_idx));
    nul_chk->fast_dump();
  }
#endif
}
Example #5
0
//------------------------------sched_call-------------------------------------
uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, GrowableArray<int> &ready_cnt, MachCallNode *mcall, VectorSet &next_call ) {
  RegMask regs;

  // Schedule all the users of the call right now.  All the users are
  // projection Nodes, so they must be scheduled next to the call.
  // Collect all the defined registers.
  for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) {
    Node* n = mcall->fast_out(i);
    assert( n->is_MachProj(), "" );
    int n_cnt = ready_cnt.at(n->_idx)-1;
    ready_cnt.at_put(n->_idx, n_cnt);
    assert( n_cnt == 0, "" );
    // Schedule next to call
    _nodes.map(node_cnt++, n);
    // Collect defined registers
    regs.OR(n->out_RegMask());
    // Check for scheduling the next control-definer
    if( n->bottom_type() == Type::CONTROL )
      // Warm up next pile of heuristic bits
      needed_for_next_call(n, next_call, bbs);

    // Children of projections are now all ready
    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
      Node* m = n->fast_out(j); // Get user
      if( bbs[m->_idx] != this ) continue;
      if( m->is_Phi() ) continue;
      int m_cnt = ready_cnt.at(m->_idx)-1;
      ready_cnt.at_put(m->_idx, m_cnt);
      if( m_cnt == 0 )
        worklist.push(m);
    }

  }

  // Act as if the call defines the Frame Pointer.
  // Certainly the FP is alive and well after the call.
  regs.Insert(matcher.c_frame_pointer());

  // Set all registers killed and not already defined by the call.
  uint r_cnt = mcall->tf()->range()->cnt();
  int op = mcall->ideal_Opcode();
  MachProjNode *proj = new (matcher.C, 1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );
  bbs.map(proj->_idx,this);
  _nodes.insert(node_cnt++, proj);

  // Select the right register save policy.
  const char * save_policy;
  switch (op) {
    case Op_CallRuntime:
    case Op_CallLeaf:
    case Op_CallLeafNoFP:
      // Calling C code so use C calling convention
      save_policy = matcher._c_reg_save_policy;
      break;

    case Op_CallStaticJava:
    case Op_CallDynamicJava:
      // Calling Java code so use Java calling convention
      save_policy = matcher._register_save_policy;
      break;

    default:
      ShouldNotReachHere();
  }

  // When using CallRuntime mark SOE registers as killed by the call
  // so values that could show up in the RegisterMap aren't live in a
  // callee saved register since the register wouldn't know where to
  // find them.  CallLeaf and CallLeafNoFP are ok because they can't
  // have debug info on them.  Strictly speaking this only needs to be
  // done for oops since idealreg2debugmask takes care of debug info
  // references but there no way to handle oops differently than other
  // pointers as far as the kill mask goes.
  bool exclude_soe = op == Op_CallRuntime;

  // If the call is a MethodHandle invoke, we need to exclude the
  // register which is used to save the SP value over MH invokes from
  // the mask.  Otherwise this register could be used for
  // deoptimization information.
  if (op == Op_CallStaticJava) {
    MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall;
    if (mcallstaticjava->_method_handle_invoke)
      proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask());
  }

  add_call_kills(proj, regs, save_policy, exclude_soe);

  return node_cnt;
}
Example #6
0
//------------------------------call_catch_cleanup-----------------------------
// If we inserted any instructions between a Call and his CatchNode,
// clone the instructions on all paths below the Catch.
void Block::call_catch_cleanup(Block_Array &bbs) {

  // End of region to clone
  uint end = end_idx();
  if( !_nodes[end]->is_Catch() ) return;
  // Start of region to clone
  uint beg = end;
  while(!_nodes[beg-1]->is_MachProj() ||
        !_nodes[beg-1]->in(0)->is_MachCall() ) {
    beg--;
    assert(beg > 0,"Catch cleanup walking beyond block boundary");
  }
  // Range of inserted instructions is [beg, end)
  if( beg == end ) return;

  // Clone along all Catch output paths.  Clone area between the 'beg' and
  // 'end' indices.
  for( uint i = 0; i < _num_succs; i++ ) {
    Block *sb = _succs[i];
    // Clone the entire area; ignoring the edge fixup for now.
    for( uint j = end; j > beg; j-- ) {
      // It is safe here to clone a node with anti_dependence
      // since clones dominate on each path.
      Node *clone = _nodes[j-1]->clone();
      sb->_nodes.insert( 1, clone );
      bbs.map(clone->_idx,sb);
    }
  }


  // Fixup edges.  Check the def-use info per cloned Node
  for(uint i2 = beg; i2 < end; i2++ ) {
    uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block
    Node *n = _nodes[i2];        // Node that got cloned
    // Need DU safe iterator because of edge manipulation in calls.
    Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area());
    for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) {
      out->push(n->fast_out(j1));
    }
    uint max = out->size();
    for (uint j = 0; j < max; j++) {// For all users
      Node *use = out->pop();
      Block *buse = bbs[use->_idx];
      if( use->is_Phi() ) {
        for( uint k = 1; k < use->req(); k++ )
          if( use->in(k) == n ) {
            Node *fixup = catch_cleanup_find_cloned_def(bbs[buse->pred(k)->_idx], n, this, bbs, n_clone_idx);
            use->set_req(k, fixup);
          }
      } else {
        if (this == buse) {
          catch_cleanup_intra_block(use, n, this, beg, n_clone_idx);
        } else {
          catch_cleanup_inter_block(use, buse, n, this, bbs, n_clone_idx);
        }
      }
    } // End for all users

  } // End of for all Nodes in cloned area

  // Remove the now-dead cloned ops
  for(uint i3 = beg; i3 < end; i3++ ) {
    _nodes[beg]->disconnect_inputs(NULL);
    _nodes.remove(beg);
  }

  // If the successor blocks have a CreateEx node, move it back to the top
  for(uint i4 = 0; i4 < _num_succs; i4++ ) {
    Block *sb = _succs[i4];
    uint new_cnt = end - beg;
    // Remove any newly created, but dead, nodes.
    for( uint j = new_cnt; j > 0; j-- ) {
      Node *n = sb->_nodes[j];
      if (n->outcnt() == 0 &&
          (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){
        n->disconnect_inputs(NULL);
        sb->_nodes.remove(j);
        new_cnt--;
      }
    }
    // If any newly created nodes remain, move the CreateEx node to the top
    if (new_cnt > 0) {
      Node *cex = sb->_nodes[1+new_cnt];
      if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
        sb->_nodes.remove(1+new_cnt);
        sb->_nodes.insert(1,cex);
      }
    }
  }
}