//------------------------------is_uncommon------------------------------------
// True if block is low enough frequency or guarded by a test which 
// mostly does not go here.
bool Block::is_uncommon( Block_Array &bbs ) const {
  // Initial blocks must never be moved, so are never uncommon.
  if (head()->is_Root() || head()->is_Start())  return false;

  // Check for way-low freq
  if( _freq < BLOCK_FREQUENCY(0.00001f) ) return true;

  // Look for code shape indicating uncommon_trap or slow path
  if (has_uncommon_code()) return true;

  const float epsilon = 0.05f;
  const float guard_factor = PROB_UNLIKELY_MAG(4) / (1.f - epsilon);
  uint uncommon_preds = 0;
  uint freq_preds = 0;
  uint uncommon_for_freq_preds = 0;

  for( uint i=1; i<num_preds(); i++ ) {
    Block* guard = bbs[pred(i)->_idx];
    // Check to see if this block follows its guard 1 time out of 10000
    // or less. 
    //
    // See list of magnitude-4 unlikely probabilities in cfgnode.hpp which
    // we intend to be "uncommon", such as slow-path TLE allocation, 
    // predicted call failure, and uncommon trap triggers.
    //
    // Use an epsilon value of 5% to allow for variability in frequency 
    // predictions and floating point calculations. The net effect is
    // that guard_factor is set to 9500.
    //
    // Ignore low-frequency blocks.
    // The next check is (guard->_freq < 1.e-5 * 9500.).
    if(guard->_freq*BLOCK_FREQUENCY(guard_factor) < BLOCK_FREQUENCY(0.00001f)) {
      uncommon_preds++;
    } else {
      freq_preds++;
      if( _freq < guard->_freq * guard_factor ) {
        uncommon_for_freq_preds++;
      }
    }
  }
  if( num_preds() > 1 &&
      // The block is uncommon if all preds are uncommon or
      (uncommon_preds == (num_preds()-1) ||
      // it is uncommon for all frequent preds.
       uncommon_for_freq_preds == freq_preds) ) {
    return true; 
  }
  return false;
}
//------------------------------expand_safepoint_node----------------------
void PhaseMacroExpand::expand_safepoint_node(SafePointNode *safe) {
    // Make a fast-path/slow-path diamond around the explicit poll
    Node*tls=new(C,1)ThreadLocalNode();
    transform_later(tls);
    ConLNode *off = _igvn.longcon(in_bytes(JavaThread::please_self_suspend_offset()));
    Node *adr = new (C, 4) AddPNode( C->top(), tls, off );
    transform_later(adr);
    Node *pss = LoadNode::make(C, NULL, safe->in(TypeFunc::Memory), adr, TypeRawPtr::BOTTOM, TypeInt::INT, T_INT);
    transform_later(pss);
    Node *cmp = new (C, 3) CmpINode( pss, _igvn.intcon(0) );
    transform_later(cmp);
    Node*bol=new(C,2)BoolNode(cmp,BoolTest::ne);
    transform_later(bol);
    Node *region = new (C, 3) RegionNode(3);
    transform_later(region);
    IfNode*iff= new (C, 2) IfNode(safe->in(TypeFunc::Control), bol, PROB_UNLIKELY_MAG(6), COUNT_UNKNOWN );
    Node *slow_path = opt_iff(region,iff);
    _igvn.hash_delete(safe);
    safe->set_req(TypeFunc::Control,slow_path);
    _igvn.subsume_node_keep_old(safe,region);
    region->init_req(1,safe);
}
Exemple #3
0
//------------------------------implicit_null_check----------------------------
// Detect implicit-null-check opportunities.  Basically, find NULL checks
// with suitable memory ops nearby.  Use the memory op to do the NULL check.
// I can generate a memory op if there is not one nearby.
// The proj is the control projection for the not-null case.
// The val is the pointer being checked for nullness or
// decodeHeapOop_not_null node if it did not fold into address.
void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allowed_reasons) {
  // Assume if null check need for 0 offset then always needed
  // Intel solaris doesn't support any null checks yet and no
  // mechanism exists (yet) to set the switches at an os_cpu level
  if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return;

  // Make sure the ptr-is-null path appears to be uncommon!
  float f = block->end()->as_MachIf()->_prob;
  if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f;
  if( f > PROB_UNLIKELY_MAG(4) ) return;

  uint bidx = 0;                // Capture index of value into memop
  bool was_store;               // Memory op is a store op

  // Get the successor block for if the test ptr is non-null
  Block* not_null_block;  // this one goes with the proj
  Block* null_block;
  if (block->get_node(block->number_of_nodes()-1) == proj) {
    null_block     = block->_succs[0];
    not_null_block = block->_succs[1];
  } else {
    assert(block->get_node(block->number_of_nodes()-2) == proj, "proj is one or the other");
    not_null_block = block->_succs[0];
    null_block     = block->_succs[1];
  }
  while (null_block->is_Empty() == Block::empty_with_goto) {
    null_block     = null_block->_succs[0];
  }

  // Search the exception block for an uncommon trap.
  // (See Parse::do_if and Parse::do_ifnull for the reason
  // we need an uncommon trap.  Briefly, we need a way to
  // detect failure of this optimization, as in 6366351.)
  {
    bool found_trap = false;
    for (uint i1 = 0; i1 < null_block->number_of_nodes(); i1++) {
      Node* nn = null_block->get_node(i1);
      if (nn->is_MachCall() &&
          nn->as_MachCall()->entry_point() == SharedRuntime::uncommon_trap_blob()->entry_point()) {
        const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type();
        if (trtype->isa_int() && trtype->is_int()->is_con()) {
          jint tr_con = trtype->is_int()->get_con();
          Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con);
          Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con);
          assert((int)reason < (int)BitsPerInt, "recode bit map");
          if (is_set_nth_bit(allowed_reasons, (int) reason)
              && action != Deoptimization::Action_none) {
            // This uncommon trap is sure to recompile, eventually.
            // When that happens, C->too_many_traps will prevent
            // this transformation from happening again.
            found_trap = true;
          }
        }
        break;
      }
    }
    if (!found_trap) {
      // We did not find an uncommon trap.
      return;
    }
  }

  // Check for decodeHeapOop_not_null node which did not fold into address
  bool is_decoden = ((intptr_t)val) & 1;
  val = (Node*)(((intptr_t)val) & ~1);

  assert(!is_decoden || (val->in(0) == NULL) && val->is_Mach() &&
         (val->as_Mach()->ideal_Opcode() == Op_DecodeN), "sanity");

  // Search the successor block for a load or store who's base value is also
  // the tested value.  There may be several.
  Node_List *out = new Node_List(Thread::current()->resource_area());
  MachNode *best = NULL;        // Best found so far
  for (DUIterator i = val->outs(); val->has_out(i); i++) {
    Node *m = val->out(i);
    if( !m->is_Mach() ) continue;
    MachNode *mach = m->as_Mach();
    was_store = false;
    int iop = mach->ideal_Opcode();
    switch( iop ) {
    case Op_LoadB:
    case Op_LoadUB:
    case Op_LoadUS:
    case Op_LoadD:
    case Op_LoadF:
    case Op_LoadI:
    case Op_LoadL:
    case Op_LoadP:
    case Op_LoadN:
    case Op_LoadS:
    case Op_LoadKlass:
    case Op_LoadNKlass:
    case Op_LoadRange:
    case Op_LoadD_unaligned:
    case Op_LoadL_unaligned:
      assert(mach->in(2) == val, "should be address");
      break;
    case Op_StoreB:
    case Op_StoreC:
    case Op_StoreCM:
    case Op_StoreD:
    case Op_StoreF:
    case Op_StoreI:
    case Op_StoreL:
    case Op_StoreP:
    case Op_StoreN:
    case Op_StoreNKlass:
      was_store = true;         // Memory op is a store op
      // Stores will have their address in slot 2 (memory in slot 1).
      // If the value being nul-checked is in another slot, it means we
      // are storing the checked value, which does NOT check the value!
      if( mach->in(2) != val ) continue;
      break;                    // Found a memory op?
    case Op_StrComp:
    case Op_StrEquals:
    case Op_StrIndexOf:
    case Op_AryEq:
    case Op_EncodeISOArray:
      // Not a legit memory op for implicit null check regardless of
      // embedded loads
      continue;
    default:                    // Also check for embedded loads
      if( !mach->needs_anti_dependence_check() )
        continue;               // Not an memory op; skip it
      if( must_clone[iop] ) {
        // Do not move nodes which produce flags because
        // RA will try to clone it to place near branch and
        // it will cause recompilation, see clone_node().
        continue;
      }
      {
        // Check that value is used in memory address in
        // instructions with embedded load (CmpP val1,(val2+off)).
        Node* base;
        Node* index;
        const MachOper* oper = mach->memory_inputs(base, index);
        if (oper == NULL || oper == (MachOper*)-1) {
          continue;             // Not an memory op; skip it
        }
        if (val == base ||
            val == index && val->bottom_type()->isa_narrowoop()) {
          break;                // Found it
        } else {
          continue;             // Skip it
        }
      }
      break;
    }
    // check if the offset is not too high for implicit exception
    {
      intptr_t offset = 0;
      const TypePtr *adr_type = NULL;  // Do not need this return value here
      const Node* base = mach->get_base_and_disp(offset, adr_type);
      if (base == NULL || base == NodeSentinel) {
        // Narrow oop address doesn't have base, only index
        if( val->bottom_type()->isa_narrowoop() &&
            MacroAssembler::needs_explicit_null_check(offset) )
          continue;             // Give up if offset is beyond page size
        // cannot reason about it; is probably not implicit null exception
      } else {
        const TypePtr* tptr;
        if (UseCompressedOops && (Universe::narrow_oop_shift() == 0 ||
                                  Universe::narrow_klass_shift() == 0)) {
          // 32-bits narrow oop can be the base of address expressions
          tptr = base->get_ptr_type();
        } else {
          // only regular oops are expected here
          tptr = base->bottom_type()->is_ptr();
        }
        // Give up if offset is not a compile-time constant
        if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )
          continue;
        offset += tptr->_offset; // correct if base is offseted
        if( MacroAssembler::needs_explicit_null_check(offset) )
          continue;             // Give up is reference is beyond 4K page size
      }
    }

    // Check ctrl input to see if the null-check dominates the memory op
    Block *cb = get_block_for_node(mach);
    cb = cb->_idom;             // Always hoist at least 1 block
    if( !was_store ) {          // Stores can be hoisted only one block
      while( cb->_dom_depth > (block->_dom_depth + 1))
        cb = cb->_idom;         // Hoist loads as far as we want
      // The non-null-block should dominate the memory op, too. Live
      // range spilling will insert a spill in the non-null-block if it is
      // needs to spill the memory op for an implicit null check.
      if (cb->_dom_depth == (block->_dom_depth + 1)) {
        if (cb != not_null_block) continue;
        cb = cb->_idom;
      }
    }
    if( cb != block ) continue;

    // Found a memory user; see if it can be hoisted to check-block
    uint vidx = 0;              // Capture index of value into memop
    uint j;
    for( j = mach->req()-1; j > 0; j-- ) {
      if( mach->in(j) == val ) {
        vidx = j;
        // Ignore DecodeN val which could be hoisted to where needed.
        if( is_decoden ) continue;
      }
      // Block of memory-op input
      Block *inb = get_block_for_node(mach->in(j));
      Block *b = block;          // Start from nul check
      while( b != inb && b->_dom_depth > inb->_dom_depth )
        b = b->_idom;           // search upwards for input
      // See if input dominates null check
      if( b != inb )
        break;
    }
    if( j > 0 )
      continue;
    Block *mb = get_block_for_node(mach);
    // Hoisting stores requires more checks for the anti-dependence case.
    // Give up hoisting if we have to move the store past any load.
    if( was_store ) {
      Block *b = mb;            // Start searching here for a local load
      // mach use (faulting) trying to hoist
      // n might be blocker to hoisting
      while( b != block ) {
        uint k;
        for( k = 1; k < b->number_of_nodes(); k++ ) {
          Node *n = b->get_node(k);
          if( n->needs_anti_dependence_check() &&
              n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) )
            break;              // Found anti-dependent load
        }
        if( k < b->number_of_nodes() )
          break;                // Found anti-dependent load
        // Make sure control does not do a merge (would have to check allpaths)
        if( b->num_preds() != 2 ) break;
        b = get_block_for_node(b->pred(1)); // Move up to predecessor block
      }
      if( b != block ) continue;
    }

    // Make sure this memory op is not already being used for a NullCheck
    Node *e = mb->end();
    if( e->is_MachNullCheck() && e->in(1) == mach )
      continue;                 // Already being used as a NULL check

    // Found a candidate!  Pick one with least dom depth - the highest
    // in the dom tree should be closest to the null check.
    if (best == NULL || get_block_for_node(mach)->_dom_depth < get_block_for_node(best)->_dom_depth) {
      best = mach;
      bidx = vidx;
    }
  }
  // No candidate!
  if (best == NULL) {
    return;
  }

  // ---- Found an implicit null check
  extern int implicit_null_checks;
  implicit_null_checks++;

  if( is_decoden ) {
    // Check if we need to hoist decodeHeapOop_not_null first.
    Block *valb = get_block_for_node(val);
    if( block != valb && block->_dom_depth < valb->_dom_depth ) {
      // Hoist it up to the end of the test block.
      valb->find_remove(val);
      block->add_inst(val);
      map_node_to_block(val, block);
      // DecodeN on x86 may kill flags. Check for flag-killing projections
      // that also need to be hoisted.
      for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) {
        Node* n = val->fast_out(j);
        if( n->is_MachProj() ) {
          get_block_for_node(n)->find_remove(n);
          block->add_inst(n);
          map_node_to_block(n, block);
        }
      }
    }
  }
  // Hoist the memory candidate up to the end of the test block.
  Block *old_block = get_block_for_node(best);
  old_block->find_remove(best);
  block->add_inst(best);
  map_node_to_block(best, block);

  // Move the control dependence
  if (best->in(0) && best->in(0) == old_block->head())
    best->set_req(0, block->head());

  // Check for flag-killing projections that also need to be hoisted
  // Should be DU safe because no edge updates.
  for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) {
    Node* n = best->fast_out(j);
    if( n->is_MachProj() ) {
      get_block_for_node(n)->find_remove(n);
      block->add_inst(n);
      map_node_to_block(n, block);
    }
  }

  // proj==Op_True --> ne test; proj==Op_False --> eq test.
  // One of two graph shapes got matched:
  //   (IfTrue  (If (Bool NE (CmpP ptr NULL))))
  //   (IfFalse (If (Bool EQ (CmpP ptr NULL))))
  // NULL checks are always branch-if-eq.  If we see a IfTrue projection
  // then we are replacing a 'ne' test with a 'eq' NULL check test.
  // We need to flip the projections to keep the same semantics.
  if( proj->Opcode() == Op_IfTrue ) {
    // Swap order of projections in basic block to swap branch targets
    Node *tmp1 = block->get_node(block->end_idx()+1);
    Node *tmp2 = block->get_node(block->end_idx()+2);
    block->map_node(tmp2, block->end_idx()+1);
    block->map_node(tmp1, block->end_idx()+2);
    Node *tmp = new (C) Node(C->top()); // Use not NULL input
    tmp1->replace_by(tmp);
    tmp2->replace_by(tmp1);
    tmp->replace_by(tmp2);
    tmp->destruct();
  }

  // Remove the existing null check; use a new implicit null check instead.
  // Since schedule-local needs precise def-use info, we need to correct
  // it as well.
  Node *old_tst = proj->in(0);
  MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx);
  block->map_node(nul_chk, block->end_idx());
  map_node_to_block(nul_chk, block);
  // Redirect users of old_test to nul_chk
  for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2)
    old_tst->last_out(i2)->set_req(0, nul_chk);
  // Clean-up any dead code
  for (uint i3 = 0; i3 < old_tst->req(); i3++)
    old_tst->set_req(i3, NULL);

  latency_from_uses(nul_chk);
  latency_from_uses(best);
}
//--- expand_allocation ------------------------------------------------------
void PhaseMacroExpand::expand_allocate(AllocateNode*A) {
    // See if we are forced to go slow-path.
    if( A->_entry_point == (address)SharedRuntime::_new )
        return;      // Always go slow-path - required for finalizers, etc

    Node*A_ctl=A->proj_out(TypeFunc::Control);
    Node*A_mem=A->proj_out(TypeFunc::Memory);
    Node*A_oop=A->proj_out(TypeFunc::Parms+0);

    // Inject a fast-path / slow-path diamond.  Fast-path is still milli-code,
    // which returns an oop or null - and does not block, nor GC, nor kill
    // registers.  If we have an allocation failure the fast-path leaves the
    // regs in-place for a slow-path call which DOES block, GC, etc.
    Node*ctl=A->in(TypeFunc::Control);
    Node*kid=A->in(AllocateNode::KID);
    Node*siz=A->in(AllocateNode::AllocSize);
    Node*xtr=A->in(AllocateNode::ExtraSlow);
    Node *len = A->is_AllocateArray() ? A->in(AllocateNode::ALength) : C->top();

    if ( !A_oop && ( !A->is_AllocateArray() || ( _igvn.type(A->in(AllocateNode::ALength))->higher_equal(TypeInt::POS) ) ) ) {
        tty->print_cr("Dead allocation should be removed earlier");
        Unimplemented();
    }

    // Convert the array element count to a Long value,
    // and fold in the EKID value for oop-arrays.
    const TypeOopPtr *toop = A->_tf->range()->field_at(TypeFunc::Parms+0)->is_ptr()->cast_to_ptr_type(TypePtr::BotPTR)->is_oopptr();
    if( len != C->top() ) {
        assert0( A->is_AllocateArray() );
        Node*lenl=transform_later(new(C,2)ConvI2LNode(len));
        Node*ekid=A->in(AllocateNode::EKID);
        if( ekid->bottom_type() != TypeInt::ZERO ) {
            // Have an EKID?  Smash the array length and EKID together.
            Node *ekidl  = transform_later(new (C,2) CastP2XNode(0,ekid));
            Node *ekidshf= transform_later(new (C,3) LShiftLNode(ekidl,_igvn.intcon(32)));
            Node *combo  = transform_later(new (C,3)     OrLNode(lenl,ekidshf));
            len=combo;
        } else {
            len=lenl;
        }
        // Crunch arguments for matching.  The old ExtraSlow argument is used to
        // make more gating control flow in this function, but is not an argument
        // to the runtime call.  Neither is the EKID argument: the slow-path will
        // compute it's own EKID.
        A->set_req(AllocateNode::ExtraSlow,len);
        A->set_req(AllocateNode::ALength,C->top());
        A->set_req(AllocateNode::EKID,C->top());
    } else {
        A->set_req(AllocateNode::ExtraSlow,_igvn.zerocon(T_INT));
        assert0( !A->is_AllocateArray() );
    }

    // Extra slow-path test required?
    RegionNode *region2 = NULL;
    if( xtr->bottom_type() != TypeInt::ZERO ) { // Commonly, no extra test required
        // Extra slow-path tests can be required for fast-pathing reflection
        // allocation (and cloning).  If the new object requires e.g. finalization
        // or further class linking/loading.
        Node *cmp = transform_later(new(C,3)CmpINode(xtr,_igvn.zerocon(T_INT)));
        Node*bol=transform_later(new(C,2)BoolNode(cmp,BoolTest::eq));
        Node *iff = new (C, 2) IfNode( ctl, bol, PROB_LIKELY_MAG(5), COUNT_UNKNOWN );
        region2=new(C,3)RegionNode(3);
        transform_later(region2);
        ctl = opt_iff(region2,iff);
    }

    FastAllocNode *fal = new (C, 4) FastAllocNode(kid,siz,toop,len);
    fal->set_req(0,ctl);
    transform_later(fal);
    Node *mem = new (C,1) SCMemProjNode(fal);
    transform_later(mem);

    Node *cmp = transform_later(new(C,3)CmpPNode(fal,_igvn.zerocon(T_OBJECT)));
    Node*bol=transform_later(new(C,2)BoolNode(cmp,BoolTest::eq));
    Node*iff=new(C,2)IfNode(ctl,bol,PROB_UNLIKELY_MAG(5),COUNT_UNKNOWN);
    RegionNode *region = new (C, 3) RegionNode(3);
    transform_later(region);
    Node *slow_path = opt_iff(region,iff);

    // Make the merge point
    PhiNode*phimem=new(C,3)PhiNode(region,Type::MEMORY,TypePtr::BOTTOM);
    transform_later(phimem);
    phimem->init_req(2,mem);    // Plug in the fast-path

    PhiNode*phioop=NULL;
    if (A_oop) {
        phioop = new (C, 3) PhiNode(region,toop);
        transform_later(phioop);
        phioop->init_req(2,fal);    // Plug in the fast-path
    }

    _igvn.hash_delete(A_ctl);
    _igvn.hash_delete(A_mem);
    if (A_oop) _igvn.hash_delete (A_oop);
    _igvn.subsume_node_keep_old(A_ctl,region);
    _igvn.subsume_node_keep_old(A_mem,phimem);
    if (A_oop) _igvn.subsume_node_keep_old(A_oop,phioop);

    // Plug in the slow-path
    region->init_req(1,A_ctl);
    phimem->init_req(1,A_mem);
    if (A_oop) phioop->init_req(1,A_oop);
    if( xtr->bottom_type() != TypeInt::ZERO ) { // Commonly, no extra test required
        region2->init_req(1,slow_path);
        slow_path = region2;
    }
    A->set_req(TypeFunc::Control,slow_path);
    // The slow-path call now directly calls into the runtime.
    A->_entry_point = (address)SharedRuntime::_new;
}