예제 #1
0
//------------------------------match------------------------------------------
// Construct projections for control, I/O, memory-fields, ..., and
// return result(s) along with their RegMask info
Node *CallNode::match( const ProjNode *proj, const Matcher *match ) {
    switch (proj->_con) {
    case TypeFunc::Control:
    case TypeFunc::I_O:
    case TypeFunc::Memory:
        return new (1) MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj);

    case TypeFunc::Parms+1:       // For LONG & DOUBLE returns
        assert(tf()->_range->field_at(TypeFunc::Parms+1) == Type::HALF, "");
        // 2nd half of doubles and longs
        return new (1) MachProjNode(this,proj->_con, RegMask::Empty, (uint)OptoReg::Bad);

    case TypeFunc::Parms: {       // Normal returns
        uint ideal_reg = Matcher::base2reg[tf()->range()->field_at(TypeFunc::Parms)->base()];
        OptoRegPair regs = is_CallRuntime()
                           ? match->c_return_value(ideal_reg,true)  // Calls into C runtime
                           : match->  return_value(ideal_reg,true); // Calls into compiled Java code
        RegMask rm = RegMask(regs.lo());
        if( regs.hi() != OptoReg::Bad )
            rm.Insert( regs.hi() );
        return new (1) MachProjNode(this,proj->_con,rm,ideal_reg);
    }

    case TypeFunc::ReturnAdr:
    case TypeFunc::FramePtr:
    default:
        ShouldNotReachHere();
    }
    return NULL;
}
예제 #2
0
//------------------------------sched_call-------------------------------------
uint Block::sched_call( Matcher &m, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) {
  RegMask regs;

  // Schedule all the users of the call right now.  All the users are
  // projection Nodes, so they must be scheduled next to the call.
  // Collect all the defined registers.
  for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) {
    Node* n = mcall->fast_out(i);
    assert( n->Opcode()==Op_MachProj, "" );
    --ready_cnt[n->_idx];
    assert( !ready_cnt[n->_idx], "" );
    // Schedule next to call
    _nodes.map(node_cnt++, n);
    // Collect defined registers
    regs.OR(n->out_RegMask());
    // Check for scheduling the next control-definer
    if( n->bottom_type() == Type::CONTROL ) 
      // Warm up next pile of heuristic bits
      needed_for_next_call(n, next_call, bbs);

    // Children of projections are now all ready
    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
      Node* m = n->fast_out(j); // Get user
      if( bbs[m->_idx] != this ) continue;
      if( m->is_Phi() ) continue;
      if( !--ready_cnt[m->_idx] ) 
        worklist.push(m);
    }
  
  }

  // Act as if the call defines the Frame Pointer.
  // Certainly the FP is alive and well after the call.
  regs.Insert(m.c_frame_pointer());

  // Set all registers killed and not already defined by the call.
  uint r_cnt = mcall->tf()->range()->cnt();
  int op = mcall->ideal_Opcode();
  MachProjNode *proj = new (1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );
  bbs.map(proj->_idx,this);
  _nodes.insert(node_cnt++, proj);

  for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) {
    if( !regs.Member(r) ) {     // Not already defined by the call  
      // Save-on-call register?
      if( (m._register_save_policy[r] == 'C') ||
          (m._register_save_policy[r] == 'A') ||
          ((m._register_save_policy[r] == 'E') &&
           (op == Op_CallRuntime     ||
            op == Op_CallNative      ||
            op == Op_CallInterpreter ||
            op == Op_CallLeaf)) ) { 
        proj->_rout.Insert(r);
      }
    }
  }

  return node_cnt;
}
예제 #3
0
//------------------------------compute_separating_interferences---------------
// Factored code from copy_copy that computes extra interferences from
// lengthening a live range by double-coalescing.
uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) {

  assert(!lrgs(lr1)._fat_proj, "cannot coalesce fat_proj");
  assert(!lrgs(lr2)._fat_proj, "cannot coalesce fat_proj");
  Node *prev_copy = dst_copy->in(dst_copy->is_Copy());
  Block *b2 = b;
  uint bindex2 = bindex;
  while( 1 ) {
    // Find previous instruction
    bindex2--;                  // Chain backwards 1 instruction
    while( bindex2 == 0 ) {     // At block start, find prior block
      assert( b2->num_preds() == 2, "cannot double coalesce across c-flow" );
      b2 = _phc._cfg._bbs[b2->pred(1)->_idx];
      bindex2 = b2->end_idx()-1;
    }
    // Get prior instruction
    assert(bindex2 < b2->_nodes.size(), "index out of bounds");
    Node *x = b2->_nodes[bindex2];
    if( x == prev_copy ) {      // Previous copy in copy chain?
      if( prev_copy == src_copy)// Found end of chain and all interferences
        break;                  // So break out of loop
      // Else work back one in copy chain
      prev_copy = prev_copy->in(prev_copy->is_Copy());
    } else {                    // Else collect interferences
      uint lidx = _phc.Find(x);
      // Found another def of live-range being stretched?
      if( lidx == lr1 ) return max_juint;
      if( lidx == lr2 ) return max_juint;

      // If we attempt to coalesce across a bound def
      if( lrgs(lidx).is_bound() ) {
        // Do not let the coalesced LRG expect to get the bound color
        rm.SUBTRACT( lrgs(lidx).mask() );
        // Recompute rm_size
        rm_size = rm.Size();
        //if( rm._flags ) rm_size += 1000000;
        if( reg_degree >= rm_size ) return max_juint;
      }
      if( rm.overlap(lrgs(lidx).mask()) ) {
        // Insert lidx into union LRG; returns TRUE if actually inserted
        if( _ulr.insert(lidx) ) {
          // Infinite-stack neighbors do not alter colorability, as they
          // can always color to some other color.
          if( !lrgs(lidx).mask().is_AllStack() ) {
            // If this coalesce will make any new neighbor uncolorable,
            // do not coalesce.
            if( lrgs(lidx).just_lo_degree() )
              return max_juint;
            // Bump our degree
            if( ++reg_degree >= rm_size )
              return max_juint;
          } // End of if not infinite-stack neighbor
        } // End of if actually inserted
      } // End of if live range overlaps
    } // End of else collect interferences for 1 node
  } // End of while forever, scan back for interferences
  return reg_degree;
}
예제 #4
0
파일: lcm.cpp 프로젝트: dain/graal
//------------------------------add_call_kills-------------------------------------
// helper function that adds caller save registers to MachProjNode
static void add_call_kills(MachProjNode *proj, RegMask& regs, const char* save_policy, bool exclude_soe) {
  // Fill in the kill mask for the call
  for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) {
    if( !regs.Member(r) ) {     // Not already defined by the call
      // Save-on-call register?
      if ((save_policy[r] == 'C') ||
          (save_policy[r] == 'A') ||
          ((save_policy[r] == 'E') && exclude_soe)) {
        proj->_rout.Insert(r);
      }
    }
  }
}
예제 #5
0
파일: lcm.cpp 프로젝트: dain/graal
//------------------------------schedule_local---------------------------------
// Topological sort within a block.  Someday become a real scheduler.
bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) {
  // Already "sorted" are the block start Node (as the first entry), and
  // the block-ending Node and any trailing control projections.  We leave
  // these alone.  PhiNodes and ParmNodes are made to follow the block start
  // Node.  Everything else gets topo-sorted.

#ifndef PRODUCT
    if (trace_opto_pipelining()) {
      tty->print_cr("# --- schedule_local B%d, before: ---", block->_pre_order);
      for (uint i = 0;i < block->number_of_nodes(); i++) {
        tty->print("# ");
        block->get_node(i)->fast_dump();
      }
      tty->print_cr("#");
    }
#endif

  // RootNode is already sorted
  if (block->number_of_nodes() == 1) {
    return true;
  }

  // Move PhiNodes and ParmNodes from 1 to cnt up to the start
  uint node_cnt = block->end_idx();
  uint phi_cnt = 1;
  uint i;
  for( i = 1; i<node_cnt; i++ ) { // Scan for Phi
    Node *n = block->get_node(i);
    if( n->is_Phi() ||          // Found a PhiNode or ParmNode
        (n->is_Proj()  && n->in(0) == block->head()) ) {
      // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt
      block->map_node(block->get_node(phi_cnt), i);
      block->map_node(n, phi_cnt++);  // swap Phi/Parm up front
    } else {                    // All others
      // Count block-local inputs to 'n'
      uint cnt = n->len();      // Input count
      uint local = 0;
      for( uint j=0; j<cnt; j++ ) {
        Node *m = n->in(j);
        if( m && get_block_for_node(m) == block && !m->is_top() )
          local++;              // One more block-local input
      }
      ready_cnt.at_put(n->_idx, local); // Count em up

#ifdef ASSERT
      if( UseConcMarkSweepGC || UseG1GC ) {
        if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) {
          // Check the precedence edges
          for (uint prec = n->req(); prec < n->len(); prec++) {
            Node* oop_store = n->in(prec);
            if (oop_store != NULL) {
              assert(get_block_for_node(oop_store)->_dom_depth <= block->_dom_depth, "oop_store must dominate card-mark");
            }
          }
        }
      }
#endif

      // A few node types require changing a required edge to a precedence edge
      // before allocation.
      if( n->is_Mach() && n->req() > TypeFunc::Parms &&
          (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire ||
           n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) {
        // MemBarAcquire could be created without Precedent edge.
        // del_req() replaces the specified edge with the last input edge
        // and then removes the last edge. If the specified edge > number of
        // edges the last edge will be moved outside of the input edges array
        // and the edge will be lost. This is why this code should be
        // executed only when Precedent (== TypeFunc::Parms) edge is present.
        Node *x = n->in(TypeFunc::Parms);
        n->del_req(TypeFunc::Parms);
        n->add_prec(x);
      }
    }
  }
  for(uint i2=i; i2< block->number_of_nodes(); i2++ ) // Trailing guys get zapped count
    ready_cnt.at_put(block->get_node(i2)->_idx, 0);

  // All the prescheduled guys do not hold back internal nodes
  uint i3;
  for(i3 = 0; i3<phi_cnt; i3++ ) {  // For all pre-scheduled
    Node *n = block->get_node(i3);       // Get pre-scheduled
    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
      Node* m = n->fast_out(j);
      if (get_block_for_node(m) == block) { // Local-block user
        int m_cnt = ready_cnt.at(m->_idx)-1;
        ready_cnt.at_put(m->_idx, m_cnt);   // Fix ready count
      }
    }
  }

  Node_List delay;
  // Make a worklist
  Node_List worklist;
  for(uint i4=i3; i4<node_cnt; i4++ ) {    // Put ready guys on worklist
    Node *m = block->get_node(i4);
    if( !ready_cnt.at(m->_idx) ) {   // Zero ready count?
      if (m->is_iteratively_computed()) {
        // Push induction variable increments last to allow other uses
        // of the phi to be scheduled first. The select() method breaks
        // ties in scheduling by worklist order.
        delay.push(m);
      } else if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_CreateEx) {
        // Force the CreateEx to the top of the list so it's processed
        // first and ends up at the start of the block.
        worklist.insert(0, m);
      } else {
        worklist.push(m);         // Then on to worklist!
      }
    }
  }
  while (delay.size()) {
    Node* d = delay.pop();
    worklist.push(d);
  }

  // Warm up the 'next_call' heuristic bits
  needed_for_next_call(block, block->head(), next_call);

#ifndef PRODUCT
    if (trace_opto_pipelining()) {
      for (uint j=0; j< block->number_of_nodes(); j++) {
        Node     *n = block->get_node(j);
        int     idx = n->_idx;
        tty->print("#   ready cnt:%3d  ", ready_cnt.at(idx));
        tty->print("latency:%3d  ", get_latency_for_node(n));
        tty->print("%4d: %s\n", idx, n->Name());
      }
    }
#endif

  uint max_idx = (uint)ready_cnt.length();
  // Pull from worklist and schedule
  while( worklist.size() ) {    // Worklist is not ready

#ifndef PRODUCT
    if (trace_opto_pipelining()) {
      tty->print("#   ready list:");
      for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
        Node *n = worklist[i];      // Get Node on worklist
        tty->print(" %d", n->_idx);
      }
      tty->cr();
    }
#endif

    // Select and pop a ready guy from worklist
    Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt);
    block->map_node(n, phi_cnt++);    // Schedule him next

#ifndef PRODUCT
    if (trace_opto_pipelining()) {
      tty->print("#    select %d: %s", n->_idx, n->Name());
      tty->print(", latency:%d", get_latency_for_node(n));
      n->dump();
      if (Verbose) {
        tty->print("#   ready list:");
        for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
          Node *n = worklist[i];      // Get Node on worklist
          tty->print(" %d", n->_idx);
        }
        tty->cr();
      }
    }

#endif
    if( n->is_MachCall() ) {
      MachCallNode *mcall = n->as_MachCall();
      phi_cnt = sched_call(block, phi_cnt, worklist, ready_cnt, mcall, next_call);
      continue;
    }

    if (n->is_Mach() && n->as_Mach()->has_call()) {
      RegMask regs;
      regs.Insert(_matcher.c_frame_pointer());
      regs.OR(n->out_RegMask());

      MachProjNode *proj = new (C) MachProjNode( n, 1, RegMask::Empty, MachProjNode::fat_proj );
      map_node_to_block(proj, block);
      block->insert_node(proj, phi_cnt++);

      add_call_kills(proj, regs, _matcher._c_reg_save_policy, false);
    }

    // Children are now all ready
    for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) {
      Node* m = n->fast_out(i5); // Get user
      if (get_block_for_node(m) != block) {
        continue;
      }
      if( m->is_Phi() ) continue;
      if (m->_idx >= max_idx) { // new node, skip it
        assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types");
        continue;
      }
      int m_cnt = ready_cnt.at(m->_idx)-1;
      ready_cnt.at_put(m->_idx, m_cnt);
      if( m_cnt == 0 )
        worklist.push(m);
    }
  }

  if( phi_cnt != block->end_idx() ) {
    // did not schedule all.  Retry, Bailout, or Die
    if (C->subsume_loads() == true && !C->failing()) {
      // Retry with subsume_loads == false
      // If this is the first failure, the sentinel string will "stick"
      // to the Compile object, and the C2Compiler will see it and retry.
      C->record_failure(C2Compiler::retry_no_subsuming_loads());
    }
    // assert( phi_cnt == end_idx(), "did not schedule all" );
    return false;
  }

#ifndef PRODUCT
  if (trace_opto_pipelining()) {
    tty->print_cr("#");
    tty->print_cr("# after schedule_local");
    for (uint i = 0;i < block->number_of_nodes();i++) {
      tty->print("# ");
      block->get_node(i)->fast_dump();
    }
    tty->cr();
  }
#endif


  return true;
}
예제 #6
0
파일: lcm.cpp 프로젝트: dain/graal
//------------------------------sched_call-------------------------------------
uint PhaseCFG::sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call) {
  RegMask regs;

  // Schedule all the users of the call right now.  All the users are
  // projection Nodes, so they must be scheduled next to the call.
  // Collect all the defined registers.
  for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) {
    Node* n = mcall->fast_out(i);
    assert( n->is_MachProj(), "" );
    int n_cnt = ready_cnt.at(n->_idx)-1;
    ready_cnt.at_put(n->_idx, n_cnt);
    assert( n_cnt == 0, "" );
    // Schedule next to call
    block->map_node(n, node_cnt++);
    // Collect defined registers
    regs.OR(n->out_RegMask());
    // Check for scheduling the next control-definer
    if( n->bottom_type() == Type::CONTROL )
      // Warm up next pile of heuristic bits
      needed_for_next_call(block, n, next_call);

    // Children of projections are now all ready
    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
      Node* m = n->fast_out(j); // Get user
      if(get_block_for_node(m) != block) {
        continue;
      }
      if( m->is_Phi() ) continue;
      int m_cnt = ready_cnt.at(m->_idx)-1;
      ready_cnt.at_put(m->_idx, m_cnt);
      if( m_cnt == 0 )
        worklist.push(m);
    }

  }

  // Act as if the call defines the Frame Pointer.
  // Certainly the FP is alive and well after the call.
  regs.Insert(_matcher.c_frame_pointer());

  // Set all registers killed and not already defined by the call.
  uint r_cnt = mcall->tf()->range()->cnt();
  int op = mcall->ideal_Opcode();
  MachProjNode *proj = new (C) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );
  map_node_to_block(proj, block);
  block->insert_node(proj, node_cnt++);

  // Select the right register save policy.
  const char * save_policy;
  switch (op) {
    case Op_CallRuntime:
    case Op_CallLeaf:
    case Op_CallLeafNoFP:
      // Calling C code so use C calling convention
      save_policy = _matcher._c_reg_save_policy;
      break;

    case Op_CallStaticJava:
    case Op_CallDynamicJava:
      // Calling Java code so use Java calling convention
      save_policy = _matcher._register_save_policy;
      break;

    default:
      ShouldNotReachHere();
  }

  // When using CallRuntime mark SOE registers as killed by the call
  // so values that could show up in the RegisterMap aren't live in a
  // callee saved register since the register wouldn't know where to
  // find them.  CallLeaf and CallLeafNoFP are ok because they can't
  // have debug info on them.  Strictly speaking this only needs to be
  // done for oops since idealreg2debugmask takes care of debug info
  // references but there no way to handle oops differently than other
  // pointers as far as the kill mask goes.
  bool exclude_soe = op == Op_CallRuntime;

  // If the call is a MethodHandle invoke, we need to exclude the
  // register which is used to save the SP value over MH invokes from
  // the mask.  Otherwise this register could be used for
  // deoptimization information.
  if (op == Op_CallStaticJava) {
    MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall;
    if (mcallstaticjava->_method_handle_invoke)
      proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask());
  }

  add_call_kills(proj, regs, save_policy, exclude_soe);

  return node_cnt;
}
예제 #7
0
uint IndexSet::lrg_union(uint lr1, uint lr2,
                         const uint fail_degree,
                         const PhaseIFG *ifg,
                         const RegMask &mask ) {
  IndexSet *one = ifg->neighbors(lr1);
  IndexSet *two = ifg->neighbors(lr2);
  LRG &lrg1 = ifg->lrgs(lr1);
  LRG &lrg2 = ifg->lrgs(lr2);
#ifdef ASSERT
  assert(_max_elements == one->_max_elements, "max element mismatch");
  check_watch("union destination");
  one->check_watch("union source");
  two->check_watch("union source");
#endif

  // Compute the degree of the combined live-range.  The combined
  // live-range has the union of the original live-ranges' neighbors set as
  // well as the neighbors of all intermediate copies, minus those neighbors
  // that can not use the intersected allowed-register-set.

  // Copy the larger set.  Insert the smaller set into the larger.
  if (two->count() > one->count()) {
    IndexSet *temp = one;
    one = two;
    two = temp;
  }

  clear();

  // Used to compute degree of register-only interferences.  Infinite-stack
  // neighbors do not alter colorability, as they can always color to some
  // other color.  (A variant of the Briggs assertion)
  uint reg_degree = 0;

  uint element;
  // Load up the combined interference set with the neighbors of one
  IndexSetIterator elements(one);
  while ((element = elements.next()) != 0) {
    LRG &lrg = ifg->lrgs(element);
    if (mask.overlap(lrg.mask())) {
      insert(element);
      if( !lrg.mask().is_AllStack() ) {
        reg_degree += lrg1.compute_degree(lrg);
        if( reg_degree >= fail_degree ) return reg_degree;
      } else {
        // !!!!! Danger!  No update to reg_degree despite having a neighbor.
        // A variant of the Briggs assertion.
        // Not needed if I simplify during coalesce, ala George/Appel.
        assert( lrg.lo_degree(), "" );
      }
    }
  }
  // Add neighbors of two as well
  IndexSetIterator elements2(two);
  while ((element = elements2.next()) != 0) {
    LRG &lrg = ifg->lrgs(element);
    if (mask.overlap(lrg.mask())) {
      if (insert(element)) {
        if( !lrg.mask().is_AllStack() ) {
          reg_degree += lrg2.compute_degree(lrg);
          if( reg_degree >= fail_degree ) return reg_degree;
        } else {
          // !!!!! Danger!  No update to reg_degree despite having a neighbor.
          // A variant of the Briggs assertion.
          // Not needed if I simplify during coalesce, ala George/Appel.
          assert( lrg.lo_degree(), "" );
        }
      }
    }
  }

  return reg_degree;
}
예제 #8
0
//------------------------------copy_copy--------------------------------------
// See if I can coalesce a series of multiple copies together.  I need the
// final dest copy and the original src copy.  They can be the same Node.
// Compute the compatible register masks.
bool PhaseConservativeCoalesce::copy_copy( Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {

  if( !dst_copy->is_SpillCopy() ) return false;
  if( !src_copy->is_SpillCopy() ) return false;
  Node *src_def = src_copy->in(src_copy->is_Copy());
  uint lr1 = _phc.Find(dst_copy);
  uint lr2 = _phc.Find(src_def );

  // Same live ranges already?
  if( lr1 == lr2 ) return false;

  // Interfere?
  if( _phc._ifg->test_edge_sq( lr1, lr2 ) ) return false;

  // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK.
  if( !lrgs(lr1)._is_oop && lrgs(lr2)._is_oop ) // not an oop->int cast
    return false;

  // Coalescing between an aligned live range and a mis-aligned live range?
  // No, no!  Alignment changes how we count degree.
  if( lrgs(lr1)._fat_proj != lrgs(lr2)._fat_proj )
    return false;

  // Sort; use smaller live-range number
  Node *lr1_node = dst_copy;
  Node *lr2_node = src_def;
  if( lr1 > lr2 ) {
    uint tmp = lr1; lr1 = lr2; lr2 = tmp;
    lr1_node = src_def;  lr2_node = dst_copy;
  }

  // Check for compatibility of the 2 live ranges by
  // intersecting their allowed register sets.
  RegMask rm = lrgs(lr1).mask();
  rm.AND(lrgs(lr2).mask());
  // Number of bits free
  uint rm_size = rm.Size();

  if (UseFPUForSpilling && rm.is_AllStack() ) {
    // Don't coalesce when frequency difference is large
    Block *dst_b = _phc._cfg._bbs[dst_copy->_idx];
    Block *src_def_b = _phc._cfg._bbs[src_def->_idx];
    if (src_def_b->_freq > 10*dst_b->_freq )
      return false;
  }

  // If we can use any stack slot, then effective size is infinite
  if( rm.is_AllStack() ) rm_size += 1000000;
  // Incompatible masks, no way to coalesce
  if( rm_size == 0 ) return false;

  // Another early bail-out test is when we are double-coalescing and the
  // 2 copies are separated by some control flow.
  if( dst_copy != src_copy ) {
    Block *src_b = _phc._cfg._bbs[src_copy->_idx];
    Block *b2 = b;
    while( b2 != src_b ) {
      if( b2->num_preds() > 2 ){// Found merge-point
        _phc._lost_opp_cflow_coalesce++;
        // extra record_bias commented out because Chris believes it is not
        // productive.  Since we can record only 1 bias, we want to choose one
        // that stands a chance of working and this one probably does not.
        //record_bias( _phc._lrgs, lr1, lr2 );
        return false;           // To hard to find all interferences
      }
      b2 = _phc._cfg._bbs[b2->pred(1)->_idx];
    }
  }

  // Union the two interference sets together into '_ulr'
  uint reg_degree = _ulr.lrg_union( lr1, lr2, rm_size, _phc._ifg, rm );

  if( reg_degree >= rm_size ) {
    record_bias( _phc._ifg, lr1, lr2 );
    return false;
  }

  // Now I need to compute all the interferences between dst_copy and
  // src_copy.  I'm not willing visit the entire interference graph, so
  // I limit my search to things in dst_copy's block or in a straight
  // line of previous blocks.  I give up at merge points or when I get
  // more interferences than my degree.  I can stop when I find src_copy.
  if( dst_copy != src_copy ) {
    reg_degree = compute_separating_interferences(dst_copy, src_copy, b, bindex, rm, rm_size, reg_degree, lr1, lr2 );
    if( reg_degree == max_juint ) {
      record_bias( _phc._ifg, lr1, lr2 );
      return false;
    }
  } // End of if dst_copy & src_copy are different


  // ---- THE COMBINED LRG IS COLORABLE ----

  // YEAH - Now coalesce this copy away
  assert( lrgs(lr1).num_regs() == lrgs(lr2).num_regs(),   "" );

  IndexSet *n_lr1 = _phc._ifg->neighbors(lr1);
  IndexSet *n_lr2 = _phc._ifg->neighbors(lr2);

  // Update the interference graph
  update_ifg(lr1, lr2, n_lr1, n_lr2);

  _ulr.remove(lr1);

  // Uncomment the following code to trace Coalescing in great detail.
  //
  //if (false) {
  //  tty->cr();
  //  tty->print_cr("#######################################");
  //  tty->print_cr("union %d and %d", lr1, lr2);
  //  n_lr1->dump();
  //  n_lr2->dump();
  //  tty->print_cr("resulting set is");
  //  _ulr.dump();
  //}

  // Replace n_lr1 with the new combined live range.  _ulr will use
  // n_lr1's old memory on the next iteration.  n_lr2 is cleared to
  // send its internal memory to the free list.
  _ulr.swap(n_lr1);
  _ulr.clear();
  n_lr2->clear();

  lrgs(lr1).set_degree( _phc._ifg->effective_degree(lr1) );
  lrgs(lr2).set_degree( 0 );

  // Join live ranges.  Merge larger into smaller.  Union lr2 into lr1 in the
  // union-find tree
  union_helper( lr1_node, lr2_node, lr1, lr2, src_def, dst_copy, src_copy, b, bindex );
  // Combine register restrictions
  lrgs(lr1).set_mask(rm);
  lrgs(lr1).compute_set_mask_size();
  lrgs(lr1)._cost += lrgs(lr2)._cost;
  lrgs(lr1)._area += lrgs(lr2)._area;

  // While its uncommon to successfully coalesce live ranges that started out
  // being not-lo-degree, it can happen.  In any case the combined coalesced
  // live range better Simplify nicely.
  lrgs(lr1)._was_lo = 1;

  // kinda expensive to do all the time
  //tty->print_cr("warning: slow verify happening");
  //_phc._ifg->verify( &_phc );
  return true;
}
예제 #9
0
파일: lcm.cpp 프로젝트: AllenWeb/openjdk-1
//------------------------------sched_call-------------------------------------
uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) {
  RegMask regs;

  // Schedule all the users of the call right now.  All the users are
  // projection Nodes, so they must be scheduled next to the call.
  // Collect all the defined registers.
  for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) {
    Node* n = mcall->fast_out(i);
    assert( n->Opcode()==Op_MachProj, "" );
    --ready_cnt[n->_idx];
    assert( !ready_cnt[n->_idx], "" );
    // Schedule next to call
    _nodes.map(node_cnt++, n);
    // Collect defined registers
    regs.OR(n->out_RegMask());
    // Check for scheduling the next control-definer
    if( n->bottom_type() == Type::CONTROL )
      // Warm up next pile of heuristic bits
      needed_for_next_call(n, next_call, bbs);

    // Children of projections are now all ready
    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
      Node* m = n->fast_out(j); // Get user
      if( bbs[m->_idx] != this ) continue;
      if( m->is_Phi() ) continue;
      if( !--ready_cnt[m->_idx] )
        worklist.push(m);
    }

  }

  // Act as if the call defines the Frame Pointer.
  // Certainly the FP is alive and well after the call.
  regs.Insert(matcher.c_frame_pointer());

  // Set all registers killed and not already defined by the call.
  uint r_cnt = mcall->tf()->range()->cnt();
  int op = mcall->ideal_Opcode();
  MachProjNode *proj = new (matcher.C, 1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );
  bbs.map(proj->_idx,this);
  _nodes.insert(node_cnt++, proj);

  // Select the right register save policy.
  const char * save_policy;
  switch (op) {
    case Op_CallRuntime:
    case Op_CallLeaf:
    case Op_CallLeafNoFP:
      // Calling C code so use C calling convention
      save_policy = matcher._c_reg_save_policy;
      break;

    case Op_CallStaticJava:
    case Op_CallDynamicJava:
      // Calling Java code so use Java calling convention
      save_policy = matcher._register_save_policy;
      break;

    default:
      ShouldNotReachHere();
  }

  // When using CallRuntime mark SOE registers as killed by the call
  // so values that could show up in the RegisterMap aren't live in a
  // callee saved register since the register wouldn't know where to
  // find them.  CallLeaf and CallLeafNoFP are ok because they can't
  // have debug info on them.  Strictly speaking this only needs to be
  // done for oops since idealreg2debugmask takes care of debug info
  // references but there no way to handle oops differently than other
  // pointers as far as the kill mask goes.
  bool exclude_soe = op == Op_CallRuntime;

  // If the call is a MethodHandle invoke, we need to exclude the
  // register which is used to save the SP value over MH invokes from
  // the mask.  Otherwise this register could be used for
  // deoptimization information.
  if (op == Op_CallStaticJava) {
    MachCallStaticJavaNode* mcallstaticjava = (MachCallStaticJavaNode*) mcall;
    if (mcallstaticjava->_method_handle_invoke)
      proj->_rout.OR(Matcher::method_handle_invoke_SP_save_mask());
  }

  // Fill in the kill mask for the call
  for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) {
    if( !regs.Member(r) ) {     // Not already defined by the call
      // Save-on-call register?
      if ((save_policy[r] == 'C') ||
          (save_policy[r] == 'A') ||
          ((save_policy[r] == 'E') && exclude_soe)) {
        proj->_rout.Insert(r);
      }
    }
  }

  return node_cnt;
}