示例#1
0
static int
rank (rtx insn1, rtx insn2)
{
  basic_block bb1 = BLOCK_FOR_INSN (insn1);
  basic_block bb2 = BLOCK_FOR_INSN (insn2);

  if (bb1->count > bb2->count
      || bb1->frequency > bb2->frequency)
    return -1;
  if (bb1->count < bb2->count
      || bb1->frequency < bb2->frequency)
    return 1;
  return 0;
}
示例#2
0
static void
split_pattern_seq (void)
{
  rtx insn;
  basic_block bb;
  rtx retlabel, retjmp, saveinsn;
  int i;
  seq_block sb;

  insn = pattern_seqs->insn;
  bb = BLOCK_FOR_INSN (insn);

  /* Get the label after the sequence. This will be the return address. The
     label will be referenced using a symbol_ref so protect it from
     deleting.  */
  retlabel = block_label_after (insn);
  LABEL_PRESERVE_P (retlabel) = 1;

  /* Emit an indirect jump via the link register after the sequence acting
     as the return insn.  Also emit a barrier and update the basic block.  */
  if (!find_reg_note (BB_END (bb), REG_NORETURN, NULL))
    retjmp = emit_jump_insn_after (gen_indirect_jump (pattern_seqs->link_reg),
                                   BB_END (bb));
  emit_barrier_after (BB_END (bb));

  /* Replace all outgoing edges with a new one to the block of RETLABEL.  */
  while (EDGE_COUNT (bb->succs) != 0)
    remove_edge (EDGE_SUCC (bb, 0));
  make_edge (bb, BLOCK_FOR_INSN (retlabel), EDGE_ABNORMAL);

  /* Split the sequence according to SEQ_BLOCKS and cache the label of the
     resulting basic blocks.  */
  i = 0;
  for (sb = seq_blocks; sb; sb = sb->next_seq_block)
    {
      for (; i < sb->length; i++)
        insn = prev_insn_in_block (insn);

      sb->label = block_label (split_block_and_df_analyze (bb, insn));
    }

  /* Emit an insn saving the return address to the link register before the
     sequence.  */
  saveinsn = emit_insn_after (gen_move_insn (pattern_seqs->link_reg,
                              gen_symbol_ref_rtx_for_label
                              (retlabel)), BB_END (bb));
  /* Update liveness info.  */
  SET_REGNO_REG_SET (df_get_live_out (bb),
                     REGNO (pattern_seqs->link_reg));
}
示例#3
0
/* The function is used to sort moves according to their execution
   frequencies.	 */
static int
move_freq_compare_func (const void *v1p, const void *v2p)
{
  rtx mv1 = *(const rtx *) v1p;
  rtx mv2 = *(const rtx *) v2p;
  int pri1, pri2;

  pri1 = BLOCK_FOR_INSN (mv1)->frequency;
  pri2 = BLOCK_FOR_INSN (mv2)->frequency;
  if (pri2 - pri1)
    return pri2 - pri1;

  /* If frequencies are equal, sort by moves, so that the results of
     qsort leave nothing to chance.  */
  return (int) INSN_UID (mv1) - (int) INSN_UID (mv2);
}
/* Dump insn INSN honoring FLAGS.  */
void
dump_insn_rtx_1 (rtx insn, int flags)
{
  int all;

  /* flags == -1 also means dumping all.  */
  all = (flags & 1);;
  if (all)
    flags |= DUMP_INSN_RTX_ALL;

  sel_print ("(");

  if (flags & DUMP_INSN_RTX_UID)
    sel_print ("%d;", INSN_UID (insn));

  if (flags & DUMP_INSN_RTX_PATTERN)
    {
      char buf[2048];

      print_insn (buf, insn, 0);
      sel_print ("%s;", buf);
    }

  if (flags & DUMP_INSN_RTX_BBN)
    {
      basic_block bb = BLOCK_FOR_INSN (insn);

      sel_print ("bb:%d;", bb != NULL ? bb->index : -1);
    }

  sel_print (")");
}
示例#5
0
static void
split_blocks_after_seqs (void)
{
  seq_block sb;
  matching_seq mseq;

  block_label_after (pattern_seqs->insn);
  for (sb = seq_blocks; sb; sb = sb->next_seq_block)
    {
      for (mseq = sb->matching_seqs; mseq; mseq = mseq->next_matching_seq)
        {
          block_label_after (mseq->insn);
          IOR_REG_SET (df_get_live_out (BLOCK_FOR_INSN (pattern_seqs->insn)),
                       df_get_live_out (BLOCK_FOR_INSN (mseq->insn)));
        }
    }
}
示例#6
0
static rtx
block_label_after (rtx insn)
{
  basic_block bb = BLOCK_FOR_INSN (insn);
  if ((insn == BB_END (bb)) && (bb->next_bb != EXIT_BLOCK_PTR))
    return block_label (bb->next_bb);
  else
    return block_label (split_block_and_df_analyze (bb, insn));
}
示例#7
0
static void
erase_matching_seqs (void)
{
  seq_block sb;
  matching_seq mseq;
  rtx insn;
  basic_block bb;
  rtx retlabel, saveinsn, callinsn;
  int i;

  for (sb = seq_blocks; sb; sb = sb->next_seq_block)
    {
      for (mseq = sb->matching_seqs; mseq; mseq = mseq->next_matching_seq)
        {
          insn = mseq->insn;
          bb = BLOCK_FOR_INSN (insn);

          /* Get the label after the sequence. This will be the return
             address. The label will be referenced using a symbol_ref so
             protect it from deleting.  */
          retlabel = block_label_after (insn);
          LABEL_PRESERVE_P (retlabel) = 1;

          /* Delete the insns of the sequence.  */
          for (i = 0; i < sb->length; i++)
            insn = prev_insn_in_block (insn);
          delete_basic_block (split_block_and_df_analyze (bb, insn));

          /* Emit an insn saving the return address to the link register
             before the deleted sequence.  */
          saveinsn = emit_insn_after (gen_move_insn (pattern_seqs->link_reg,
                                      gen_symbol_ref_rtx_for_label
                                      (retlabel)),
                                      BB_END (bb));
          BLOCK_FOR_INSN (saveinsn) = bb;

          /* Emit a jump to the appropriate part of the pattern sequence
             after the save insn. Also update the basic block.  */
          callinsn = emit_jump_insn_after (gen_jump (sb->label), saveinsn);
          JUMP_LABEL (callinsn) = sb->label;
          LABEL_NUSES (sb->label)++;
          BLOCK_FOR_INSN (callinsn) = bb;
          BB_END (bb) = callinsn;

          /* Maintain control flow and liveness information.  */
          SET_REGNO_REG_SET (df_get_live_out (bb),
                             REGNO (pattern_seqs->link_reg));
          emit_barrier_after (BB_END (bb));
          make_single_succ_edge (bb, BLOCK_FOR_INSN (sb->label), 0);
          IOR_REG_SET (df_get_live_out (bb),
		       df_get_live_in (BLOCK_FOR_INSN (sb->label)));

          make_edge (BLOCK_FOR_INSN (seq_blocks->label),
                     BLOCK_FOR_INSN (retlabel), EDGE_ABNORMAL);
        }
    }
}
示例#8
0
void
ebb_compute_jump_reg_dependencies (rtx insn, regset used)
{
  basic_block b = BLOCK_FOR_INSN (insn);
  edge e;
  edge_iterator ei;

  FOR_EACH_EDGE (e, ei, b->succs)
    if ((e->flags & EDGE_FALLTHRU) == 0)
      bitmap_ior_into (used, df_get_live_in (e->dest));
}
示例#9
0
static rtx
get_next_ref (int regno, basic_block bb, rtx *next_array)
{
  rtx insn = next_array[regno];

  /* Lazy about cleaning out the next_arrays.  */
  if (insn && BLOCK_FOR_INSN (insn) != bb)
    {
      next_array[regno] = NULL;
      insn = NULL;
    }

  return insn;
}
示例#10
0
static bool
rtl_value_profile_transformations (void)
{
  rtx insn, next;
  int changed = false;

  for (insn = get_insns (); insn; insn = next)
    {
      next = NEXT_INSN (insn);

      if (!INSN_P (insn))
	continue;

      /* Scan for insn carrying a histogram.  */
      if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
	continue;

      /* Ignore cold areas -- we are growing a code.  */
      if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
	continue;

      if (dump_file)
	{
	  fprintf (dump_file, "Trying transformations on insn %d\n",
		   INSN_UID (insn));
	  print_rtl_single (dump_file, insn);
	}

      /* Transformations:  */
      if (flag_value_profile_transformations
	  && (mod_subtract_transform (insn)
	      || divmod_fixed_value_transform (insn)
	      || mod_pow2_value_transform (insn)))
	changed = true;
#ifdef HAVE_prefetch
      if (flag_speculative_prefetching
	  && speculative_prefetching_transform (insn))
	changed = true;
#endif
    }

  if (changed)
    {
      commit_edge_insertions ();
      allocate_reg_info (max_reg_num (), FALSE, FALSE);
    }

  return changed;
}
示例#11
0
static rtx
prev_insn_in_block (rtx insn)
{
  basic_block bb = BLOCK_FOR_INSN (insn);

  if (!bb)
    return NULL_RTX;

  while (insn != BB_HEAD (bb))
    {
      insn = PREV_INSN (insn);
      if (INSN_P (insn))
        return insn;
    }
  return NULL_RTX;
}
示例#12
0
void
ebb_compute_jump_reg_dependencies (rtx insn, regset cond_set, regset used,
				   regset set)
{
  basic_block b = BLOCK_FOR_INSN (insn);
  edge e;
  edge_iterator ei;

  FOR_EACH_EDGE (e, ei, b->succs)
    if (e->flags & EDGE_FALLTHRU)
      /* The jump may be a by-product of a branch that has been merged
	 in the main codepath after being conditionalized.  Therefore
	 it may guard the fallthrough block from using a value that has
	 conditionally overwritten that of the main codepath.  So we
	 consider that it restores the value of the main codepath.  */
      bitmap_and (set, df_get_live_in (e->dest), cond_set);
    else
      bitmap_ior_into (used, df_get_live_in (e->dest));
}
示例#13
0
static void
clear_regs_live_in_seq (HARD_REG_SET * regs, rtx insn, int length)
{
  basic_block bb;
  regset_head live;
  HARD_REG_SET hlive;
  rtx x;
  int i;

  /* Initialize liveness propagation.  */
  bb = BLOCK_FOR_INSN (insn);
  INIT_REG_SET (&live);
  bitmap_copy (&live, DF_LR_OUT (bb));
  df_simulate_initialize_backwards (bb, &live);

  /* Propagate until INSN if found.  */
  for (x = BB_END (bb); x != insn; x = PREV_INSN (x))
    df_simulate_one_insn_backwards (bb, x, &live);

  /* Clear registers live after INSN.  */
  renumbered_reg_set_to_hard_reg_set (&hlive, &live);
  AND_COMPL_HARD_REG_SET (*regs, hlive);

  /* Clear registers live in and before the sequence.  */
  for (i = 0; i < length;)
    {
      rtx prev = PREV_INSN (x);
      df_simulate_one_insn_backwards (bb, x, &live);

      if (INSN_P (x))
        {
          renumbered_reg_set_to_hard_reg_set (&hlive, &live);
          AND_COMPL_HARD_REG_SET (*regs, hlive);
          i++;
        }

      x = prev;
    }

  /* Free unused data.  */
  CLEAR_REG_SET (&live);
}
示例#14
0
static basic_block
earliest_block_with_similiar_load (basic_block last_block, rtx load_insn)
{
  sd_iterator_def back_sd_it;
  dep_t back_dep;
  basic_block bb, earliest_block = NULL;

  FOR_EACH_DEP (load_insn, SD_LIST_BACK, back_sd_it, back_dep)
    {
      rtx insn1 = DEP_PRO (back_dep);

      if (DEP_TYPE (back_dep) == REG_DEP_TRUE)
	/* Found a DEF-USE dependence (insn1, load_insn).  */
	{
	  sd_iterator_def fore_sd_it;
	  dep_t fore_dep;

	  FOR_EACH_DEP (insn1, SD_LIST_FORW, fore_sd_it, fore_dep)
	    {
	      rtx insn2 = DEP_CON (fore_dep);
	      basic_block insn2_block = BLOCK_FOR_INSN (insn2);

	      if (DEP_TYPE (fore_dep) == REG_DEP_TRUE)
		{
		  if (earliest_block != NULL
		      && earliest_block->index < insn2_block->index)
		    continue;

		  /* Found a DEF-USE dependence (insn1, insn2).  */
		  if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
		    /* insn2 not guaranteed to be a 1 base reg load.  */
		    continue;

		  for (bb = last_block; bb; bb = (basic_block) bb->aux)
		    if (insn2_block == bb)
		      break;

		  if (!bb)
		    /* insn2 is the similar load.  */
		    earliest_block = insn2_block;
		}
	    }
示例#15
0
/* Do transform 1) on INSN if applicable.  */
static bool
divmod_fixed_value_transform (rtx insn)
{
  rtx set, set_src, set_dest, op1, op2, value, histogram;
  enum rtx_code code;
  enum machine_mode mode;
  gcov_type val, count, all;
  edge e;
  int prob;

  set = single_set (insn);
  if (!set)
    return false;

  set_src = SET_SRC (set);
  set_dest = SET_DEST (set);
  code = GET_CODE (set_src);
  mode = GET_MODE (set_dest);
  
  if (code != DIV && code != MOD && code != UDIV && code != UMOD)
    return false;
  op1 = XEXP (set_src, false);
  op2 = XEXP (set_src, 1);

  for (histogram = REG_NOTES (insn);
       histogram;
       histogram = XEXP (histogram, 1))
    if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
	&& XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
      break;

  if (!histogram)
    return false;

  histogram = XEXP (XEXP (histogram, 0), 1);
  value = XEXP (histogram, 0);
  histogram = XEXP (histogram, 1);
  val = INTVAL (XEXP (histogram, 0));
  histogram = XEXP (histogram, 1);
  count = INTVAL (XEXP (histogram, 0));
  histogram = XEXP (histogram, 1);
  all = INTVAL (XEXP (histogram, 0));

  /* We require that count be at least half of all; this means
     that for the transformation to fire the value must be constant
     at least 50% of time (and 75% gives the guarantee of usage).  */
  if (!rtx_equal_p (op2, value) || 2 * count < all)
    return false;

  if (dump_file)
    fprintf (dump_file, "Div/mod by constant transformation on insn %d\n",
	     INSN_UID (insn));

  /* Compute probability of taking the optimal path.  */
  prob = (count * REG_BR_PROB_BASE + all / 2) / all;

  e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
  delete_insn (insn);
  
  insert_insn_on_edge (
	gen_divmod_fixed_value (mode, code, set_dest,
				op1, op2, val, prob), e);

  return true;
}
示例#16
0
/* INSN is being scheduled after LAST.  Update counters.  */
static void
begin_schedule_ready (rtx insn, rtx last)
{
  sched_rgn_n_insns++;

  if (BLOCK_FOR_INSN (insn) == last_bb
      /* INSN is a jump in the last block, ...  */
      && control_flow_insn_p (insn)
      /* that is going to be moved over some instructions.  */
      && last != PREV_INSN (insn))
    {
      edge e;
      edge_iterator ei;
      basic_block bb;

      /* An obscure special case, where we do have partially dead
	 instruction scheduled after last control flow instruction.
	 In this case we can create new basic block.  It is
	 always exactly one basic block last in the sequence.  */
      
      FOR_EACH_EDGE (e, ei, last_bb->succs)
	if (e->flags & EDGE_FALLTHRU)
	  break;

#ifdef ENABLE_CHECKING
      gcc_assert (!e || !(e->flags & EDGE_COMPLEX));	    

      gcc_assert (BLOCK_FOR_INSN (insn) == last_bb
		  && !IS_SPECULATION_CHECK_P (insn)
		  && BB_HEAD (last_bb) != insn
		  && BB_END (last_bb) == insn);

      {
	rtx x;

	x = NEXT_INSN (insn);
	if (e)
	  gcc_assert (NOTE_P (x) || LABEL_P (x));
	else
	  gcc_assert (BARRIER_P (x));
      }
#endif

      if (e)
	{
	  bb = split_edge (e);
	  gcc_assert (NOTE_INSN_BASIC_BLOCK_P (BB_END (bb)));
	}
      else
	/* Create an empty unreachable block after the INSN.  */
	bb = create_basic_block (NEXT_INSN (insn), NULL_RTX, last_bb);
      
      /* split_edge () creates BB before E->DEST.  Keep in mind, that
	 this operation extends scheduling region till the end of BB.
	 Hence, we need to shift NEXT_TAIL, so haifa-sched.c won't go out
	 of the scheduling region.  */
      current_sched_info->next_tail = NEXT_INSN (BB_END (bb));
      gcc_assert (current_sched_info->next_tail);

      /* Append new basic block to the end of the ebb.  */
      sched_init_only_bb (bb, last_bb);
      gcc_assert (last_bb == bb);
    }
示例#17
0
static bool
find_inc (bool first_try)
{
  rtx insn;
  basic_block bb = BLOCK_FOR_INSN (mem_insn.insn);
  rtx other_insn;
  df_ref *def_rec;

  /* Make sure this reg appears only once in this insn.  */
  if (count_occurrences (PATTERN (mem_insn.insn), mem_insn.reg0, 1) != 1)
    {
      if (dump_file)
	fprintf (dump_file, "mem count failure\n");
      return false;
    }

  if (dump_file)
    dump_mem_insn (dump_file);

  /* Find the next use that is an inc.  */
  insn = get_next_ref (REGNO (mem_insn.reg0),
		       BLOCK_FOR_INSN (mem_insn.insn),
		       reg_next_inc_use);
  if (!insn)
    return false;

  /* Even though we know the next use is an add or inc because it came
     from the reg_next_inc_use, we must still reparse.  */
  if (!parse_add_or_inc (insn, false))
    {
      /* Next use was not an add.  Look for one extra case. It could be
	 that we have:

	 *(a + b)
	 ...= a;
	 ...= b + a

	 if we reverse the operands in the mem ref we would
	 find this.  Only try it once though.  */
      if (first_try && !mem_insn.reg1_is_const)
	{
	  reverse_mem ();
	  return find_inc (false);
	}
      else
	return false;
    }

  /* Need to assure that none of the operands of the inc instruction are
     assigned to by the mem insn.  */
  for (def_rec = DF_INSN_DEFS (mem_insn.insn); *def_rec; def_rec++)
    {
      df_ref def = *def_rec;
      unsigned int regno = DF_REF_REGNO (def);
      if ((regno == REGNO (inc_insn.reg0))
	  || (regno == REGNO (inc_insn.reg_res)))
	{
	  if (dump_file)
	    fprintf (dump_file, "inc conflicts with store failure.\n");
	  return false;
	}
      if (!inc_insn.reg1_is_const && (regno == REGNO (inc_insn.reg1)))
	{
	  if (dump_file)
	    fprintf (dump_file, "inc conflicts with store failure.\n");
	  return false;
	}
    }

  if (dump_file)
    dump_inc_insn (dump_file);

  if (inc_insn.form == FORM_POST_ADD)
    {
      /* Make sure that there is no insn that assigns to inc_insn.res
	 between the mem_insn and the inc_insn.  */
      rtx other_insn = get_next_ref (REGNO (inc_insn.reg_res),
				     BLOCK_FOR_INSN (mem_insn.insn),
				     reg_next_def);
      if (other_insn != inc_insn.insn)
	{
	  if (dump_file)
	    fprintf (dump_file,
		     "result of add is assigned to between mem and inc insns.\n");
	  return false;
	}

      other_insn = get_next_ref (REGNO (inc_insn.reg_res),
				 BLOCK_FOR_INSN (mem_insn.insn),
				 reg_next_use);
      if (other_insn
	  && (other_insn != inc_insn.insn)
	  && (DF_INSN_LUID (inc_insn.insn) > DF_INSN_LUID (other_insn)))
	{
	  if (dump_file)
	    fprintf (dump_file,
		     "result of add is used between mem and inc insns.\n");
	  return false;
	}

      /* For the post_add to work, the result_reg of the inc must not be
	 used in the mem insn since this will become the new index
	 register.  */
      if (reg_overlap_mentioned_p (inc_insn.reg_res, PATTERN (mem_insn.insn)))
	{
	  if (dump_file)
	    fprintf (dump_file, "base reg replacement failure.\n");
	  return false;
	}
    }

  if (mem_insn.reg1_is_const)
    {
      if (mem_insn.reg1_val == 0)
	{
	  if (!inc_insn.reg1_is_const)
	    {
	      /* The mem looks like *r0 and the rhs of the add has two
		 registers.  */
	      int luid = DF_INSN_LUID (inc_insn.insn);
	      if (inc_insn.form == FORM_POST_ADD)
		{
		  /* The trick is that we are not going to increment r0,
		     we are going to increment the result of the add insn.
		     For this trick to be correct, the result reg of
		     the inc must be a valid addressing reg.  */
		  addr_space_t as = MEM_ADDR_SPACE (*mem_insn.mem_loc);
		  if (GET_MODE (inc_insn.reg_res)
		      != targetm.addr_space.address_mode (as))
		    {
		      if (dump_file)
			fprintf (dump_file, "base reg mode failure.\n");
		      return false;
		    }

		  /* We also need to make sure that the next use of
		     inc result is after the inc.  */
		  other_insn
		    = get_next_ref (REGNO (inc_insn.reg1), bb, reg_next_use);
		  if (other_insn && luid > DF_INSN_LUID (other_insn))
		    return false;

		  if (!rtx_equal_p (mem_insn.reg0, inc_insn.reg0))
		    reverse_inc ();
		}

	      other_insn
		= get_next_ref (REGNO (inc_insn.reg1), bb, reg_next_def);
	      if (other_insn && luid > DF_INSN_LUID (other_insn))
		return false;
	    }
	}
      /* Both the inc/add and the mem have a constant.  Need to check
	 that the constants are ok. */
      else if ((mem_insn.reg1_val != inc_insn.reg1_val)
	       && (mem_insn.reg1_val != -inc_insn.reg1_val))
	return false;
    }
  else
    {
      /* The mem insn is of the form *(a + b) where a and b are both
	 regs.  It may be that in order to match the add or inc we
	 need to treat it as if it was *(b + a).  It may also be that
	 the add is of the form a + c where c does not match b and
	 then we just abandon this.  */

      int luid = DF_INSN_LUID (inc_insn.insn);
      rtx other_insn;

      /* Make sure this reg appears only once in this insn.  */
      if (count_occurrences (PATTERN (mem_insn.insn), mem_insn.reg1, 1) != 1)
	return false;

      if (inc_insn.form == FORM_POST_ADD)
	{
	  /* For this trick to be correct, the result reg of the inc
	     must be a valid addressing reg.  */
	  addr_space_t as = MEM_ADDR_SPACE (*mem_insn.mem_loc);
	  if (GET_MODE (inc_insn.reg_res)
	      != targetm.addr_space.address_mode (as))
	    {
	      if (dump_file)
		fprintf (dump_file, "base reg mode failure.\n");
	      return false;
	    }

	  if (rtx_equal_p (mem_insn.reg0, inc_insn.reg0))
	    {
	      if (!rtx_equal_p (mem_insn.reg1, inc_insn.reg1))
		{
		  /* See comment above on find_inc (false) call.  */
		  if (first_try)
		    {
		      reverse_mem ();
		      return find_inc (false);
		    }
		  else
		    return false;
		}

	      /* Need to check that there are no assignments to b
		 before the add insn.  */
	      other_insn
		= get_next_ref (REGNO (inc_insn.reg1), bb, reg_next_def);
	      if (other_insn && luid > DF_INSN_LUID (other_insn))
		return false;
	      /* All ok for the next step.  */
	    }
	  else
	    {
	      /* We know that mem_insn.reg0 must equal inc_insn.reg1
		 or else we would not have found the inc insn.  */
	      reverse_mem ();
	      if (!rtx_equal_p (mem_insn.reg0, inc_insn.reg0))
		{
		  /* See comment above on find_inc (false) call.  */
		  if (first_try)
		    return find_inc (false);
		  else
		    return false;
		}
	      /* To have gotten here know that.
	       *(b + a)

	       ... = (b + a)

	       We also know that the lhs of the inc is not b or a.  We
	       need to make sure that there are no assignments to b
	       between the mem ref and the inc.  */

	      other_insn
		= get_next_ref (REGNO (inc_insn.reg0), bb, reg_next_def);
	      if (other_insn && luid > DF_INSN_LUID (other_insn))
		return false;
	    }

	  /* Need to check that the next use of the add result is later than
	     add insn since this will be the reg incremented.  */
	  other_insn
	    = get_next_ref (REGNO (inc_insn.reg_res), bb, reg_next_use);
	  if (other_insn && luid > DF_INSN_LUID (other_insn))
	    return false;
	}
      else /* FORM_POST_INC.  There is less to check here because we
	      know that operands must line up.  */
	{
	  if (!rtx_equal_p (mem_insn.reg1, inc_insn.reg1))
	    /* See comment above on find_inc (false) call.  */
	    {
	      if (first_try)
		{
		  reverse_mem ();
		  return find_inc (false);
		}
	      else
		return false;
	    }

	  /* To have gotten here know that.
	   *(a + b)

	   ... = (a + b)

	   We also know that the lhs of the inc is not b.  We need to make
	   sure that there are no assignments to b between the mem ref and
	   the inc.  */
	  other_insn
	    = get_next_ref (REGNO (inc_insn.reg1), bb, reg_next_def);
	  if (other_insn && luid > DF_INSN_LUID (other_insn))
	    return false;
	}
    }

  if (inc_insn.form == FORM_POST_INC)
    {
      other_insn
	= get_next_ref (REGNO (inc_insn.reg0), bb, reg_next_use);
      /* When we found inc_insn, we were looking for the
	 next add or inc, not the next insn that used the
	 reg.  Because we are going to increment the reg
	 in this form, we need to make sure that there
	 were no intervening uses of reg.  */
      if (inc_insn.insn != other_insn)
	return false;
    }

  return try_merge ();
}
示例#18
0
/* The major function for aggressive pseudo coalescing of moves only
   if the both pseudos were spilled and not special reload pseudos.  */
bool
lra_coalesce (void)
{
  basic_block bb;
  rtx mv, set, insn, next, *sorted_moves;
  int i, mv_num, sregno, dregno;
  int coalesced_moves;
  int max_regno = max_reg_num ();
  bitmap_head involved_insns_bitmap;

  timevar_push (TV_LRA_COALESCE);

  if (lra_dump_file != NULL)
    fprintf (lra_dump_file,
	     "\n********** Pseudos coalescing #%d: **********\n\n",
	     ++lra_coalesce_iter);
  first_coalesced_pseudo = XNEWVEC (int, max_regno);
  next_coalesced_pseudo = XNEWVEC (int, max_regno);
  for (i = 0; i < max_regno; i++)
    first_coalesced_pseudo[i] = next_coalesced_pseudo[i] = i;
  sorted_moves = XNEWVEC (rtx, get_max_uid ());
  mv_num = 0;
  /* Collect moves.  */
  coalesced_moves = 0;
  FOR_EACH_BB (bb)
    {
      FOR_BB_INSNS_SAFE (bb, insn, next)
	if (INSN_P (insn)
	    && (set = single_set (insn)) != NULL_RTX
	    && REG_P (SET_DEST (set)) && REG_P (SET_SRC (set))
	    && (sregno = REGNO (SET_SRC (set))) >= FIRST_PSEUDO_REGISTER
	    && (dregno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER
	    && mem_move_p (sregno, dregno)
	    && coalescable_pseudo_p (sregno) && coalescable_pseudo_p (dregno)
	    && ! side_effects_p (set)
	    && !(lra_intersected_live_ranges_p
		 (lra_reg_info[sregno].live_ranges,
		  lra_reg_info[dregno].live_ranges)))
	  sorted_moves[mv_num++] = insn;
    }
  qsort (sorted_moves, mv_num, sizeof (rtx), move_freq_compare_func);
  /* Coalesced copies, most frequently executed first.	*/
  bitmap_initialize (&coalesced_pseudos_bitmap, &reg_obstack);
  bitmap_initialize (&involved_insns_bitmap, &reg_obstack);
  for (i = 0; i < mv_num; i++)
    {
      mv = sorted_moves[i];
      set = single_set (mv);
      lra_assert (set != NULL && REG_P (SET_SRC (set))
		  && REG_P (SET_DEST (set)));
      sregno = REGNO (SET_SRC (set));
      dregno = REGNO (SET_DEST (set));
      if (first_coalesced_pseudo[sregno] == first_coalesced_pseudo[dregno])
	{
	  coalesced_moves++;
	  if (lra_dump_file != NULL)
	    fprintf
	      (lra_dump_file, "      Coalescing move %i:r%d-r%d (freq=%d)\n",
	       INSN_UID (mv), sregno, dregno,
	       BLOCK_FOR_INSN (mv)->frequency);
	  /* We updated involved_insns_bitmap when doing the merge.  */
	}
      else if (!(lra_intersected_live_ranges_p
		 (lra_reg_info[first_coalesced_pseudo[sregno]].live_ranges,
		  lra_reg_info[first_coalesced_pseudo[dregno]].live_ranges)))
	{
	  coalesced_moves++;
	  if (lra_dump_file != NULL)
	    fprintf
	      (lra_dump_file,
	       "  Coalescing move %i:r%d(%d)-r%d(%d) (freq=%d)\n",
	       INSN_UID (mv), sregno, ORIGINAL_REGNO (SET_SRC (set)),
	       dregno, ORIGINAL_REGNO (SET_DEST (set)),
	       BLOCK_FOR_INSN (mv)->frequency);
	  bitmap_ior_into (&involved_insns_bitmap,
			   &lra_reg_info[sregno].insn_bitmap);
	  bitmap_ior_into (&involved_insns_bitmap,
			   &lra_reg_info[dregno].insn_bitmap);
	  merge_pseudos (sregno, dregno);
	}
    }
  bitmap_initialize (&used_pseudos_bitmap, &reg_obstack);
  FOR_EACH_BB (bb)
    {
      update_live_info (df_get_live_in (bb));
      update_live_info (df_get_live_out (bb));
      FOR_BB_INSNS_SAFE (bb, insn, next)
	if (INSN_P (insn)
	    && bitmap_bit_p (&involved_insns_bitmap, INSN_UID (insn)))
	  {
	    if (! substitute (&insn))
	      continue;
	    lra_update_insn_regno_info (insn);
	    if ((set = single_set (insn)) != NULL_RTX && set_noop_p (set))
	      {
		/* Coalesced move.  */
		if (lra_dump_file != NULL)
		  fprintf (lra_dump_file, "	 Removing move %i (freq=%d)\n",
			 INSN_UID (insn), BLOCK_FOR_INSN (insn)->frequency);
		lra_set_insn_deleted (insn);
	      }
	  }
    }
  bitmap_clear (&used_pseudos_bitmap);
  bitmap_clear (&involved_insns_bitmap);
  bitmap_clear (&coalesced_pseudos_bitmap);
  if (lra_dump_file != NULL && coalesced_moves != 0)
    fprintf (lra_dump_file, "Coalesced Moves = %d\n", coalesced_moves);
  free (sorted_moves);
  free (next_coalesced_pseudo);
  free (first_coalesced_pseudo);
  timevar_pop (TV_LRA_COALESCE);
  return coalesced_moves != 0;
}
示例#19
0
static int
reload_cse_simplify_operands (rtx insn, rtx testreg)
{
  int i, j;

  /* For each operand, all registers that are equivalent to it.  */
  HARD_REG_SET equiv_regs[MAX_RECOG_OPERANDS];

  const char *constraints[MAX_RECOG_OPERANDS];

  /* Vector recording how bad an alternative is.  */
  int *alternative_reject;
  /* Vector recording how many registers can be introduced by choosing
     this alternative.  */
  int *alternative_nregs;
  /* Array of vectors recording, for each operand and each alternative,
     which hard register to substitute, or -1 if the operand should be
     left as it is.  */
  int *op_alt_regno[MAX_RECOG_OPERANDS];
  /* Array of alternatives, sorted in order of decreasing desirability.  */
  int *alternative_order;

  extract_insn (insn);

  if (recog_data.n_alternatives == 0 || recog_data.n_operands == 0)
    return 0;

  /* Figure out which alternative currently matches.  */
  if (! constrain_operands (1))
    fatal_insn_not_found (insn);

  alternative_reject = XALLOCAVEC (int, recog_data.n_alternatives);
  alternative_nregs = XALLOCAVEC (int, recog_data.n_alternatives);
  alternative_order = XALLOCAVEC (int, recog_data.n_alternatives);
  memset (alternative_reject, 0, recog_data.n_alternatives * sizeof (int));
  memset (alternative_nregs, 0, recog_data.n_alternatives * sizeof (int));

  /* For each operand, find out which regs are equivalent.  */
  for (i = 0; i < recog_data.n_operands; i++)
    {
      cselib_val *v;
      struct elt_loc_list *l;
      rtx op;
      enum machine_mode mode;

      CLEAR_HARD_REG_SET (equiv_regs[i]);

      /* cselib blows up on CODE_LABELs.  Trying to fix that doesn't seem
	 right, so avoid the problem here.  Likewise if we have a constant
         and the insn pattern doesn't tell us the mode we need.  */
      if (LABEL_P (recog_data.operand[i])
	  || (CONSTANT_P (recog_data.operand[i])
	      && recog_data.operand_mode[i] == VOIDmode))
	continue;

      op = recog_data.operand[i];
      mode = GET_MODE (op);
#ifdef LOAD_EXTEND_OP
      if (MEM_P (op)
	  && GET_MODE_BITSIZE (mode) < BITS_PER_WORD
	  && LOAD_EXTEND_OP (mode) != UNKNOWN)
	{
	  rtx set = single_set (insn);

	  /* We might have multiple sets, some of which do implicit
	     extension.  Punt on this for now.  */
	  if (! set)
	    continue;
	  /* If the destination is also a MEM or a STRICT_LOW_PART, no
	     extension applies.
	     Also, if there is an explicit extension, we don't have to
	     worry about an implicit one.  */
	  else if (MEM_P (SET_DEST (set))
		   || GET_CODE (SET_DEST (set)) == STRICT_LOW_PART
		   || GET_CODE (SET_SRC (set)) == ZERO_EXTEND
		   || GET_CODE (SET_SRC (set)) == SIGN_EXTEND)
	    ; /* Continue ordinary processing.  */
#ifdef CANNOT_CHANGE_MODE_CLASS
	  /* If the register cannot change mode to word_mode, it follows that
	     it cannot have been used in word_mode.  */
	  else if (REG_P (SET_DEST (set))
		   && CANNOT_CHANGE_MODE_CLASS (GET_MODE (SET_DEST (set)),
						word_mode,
						REGNO_REG_CLASS (REGNO (SET_DEST (set)))))
	    ; /* Continue ordinary processing.  */
#endif
	  /* If this is a straight load, make the extension explicit.  */
	  else if (REG_P (SET_DEST (set))
		   && recog_data.n_operands == 2
		   && SET_SRC (set) == op
		   && SET_DEST (set) == recog_data.operand[1-i])
	    {
	      validate_change (insn, recog_data.operand_loc[i],
			       gen_rtx_fmt_e (LOAD_EXTEND_OP (mode),
					      word_mode, op),
			       1);
	      validate_change (insn, recog_data.operand_loc[1-i],
			       gen_rtx_REG (word_mode, REGNO (SET_DEST (set))),
			       1);
	      if (! apply_change_group ())
		return 0;
	      return reload_cse_simplify_operands (insn, testreg);
	    }
	  else
	    /* ??? There might be arithmetic operations with memory that are
	       safe to optimize, but is it worth the trouble?  */
	    continue;
	}
#endif /* LOAD_EXTEND_OP */
      v = cselib_lookup (op, recog_data.operand_mode[i], 0);
      if (! v)
	continue;

      for (l = v->locs; l; l = l->next)
	if (REG_P (l->loc))
	  SET_HARD_REG_BIT (equiv_regs[i], REGNO (l->loc));
    }

  for (i = 0; i < recog_data.n_operands; i++)
    {
      enum machine_mode mode;
      int regno;
      const char *p;

      op_alt_regno[i] = XALLOCAVEC (int, recog_data.n_alternatives);
      for (j = 0; j < recog_data.n_alternatives; j++)
	op_alt_regno[i][j] = -1;

      p = constraints[i] = recog_data.constraints[i];
      mode = recog_data.operand_mode[i];

      /* Add the reject values for each alternative given by the constraints
	 for this operand.  */
      j = 0;
      while (*p != '\0')
	{
	  char c = *p++;
	  if (c == ',')
	    j++;
	  else if (c == '?')
	    alternative_reject[j] += 3;
	  else if (c == '!')
	    alternative_reject[j] += 300;
	}

      /* We won't change operands which are already registers.  We
	 also don't want to modify output operands.  */
      regno = true_regnum (recog_data.operand[i]);
      if (regno >= 0
	  || constraints[i][0] == '='
	  || constraints[i][0] == '+')
	continue;

      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
	{
	  int rclass = (int) NO_REGS;

	  if (! TEST_HARD_REG_BIT (equiv_regs[i], regno))
	    continue;

	  SET_REGNO (testreg, regno);
	  PUT_MODE (testreg, mode);

	  /* We found a register equal to this operand.  Now look for all
	     alternatives that can accept this register and have not been
	     assigned a register they can use yet.  */
	  j = 0;
	  p = constraints[i];
	  for (;;)
	    {
	      char c = *p;

	      switch (c)
		{
		case '=':  case '+':  case '?':
		case '#':  case '&':  case '!':
		case '*':  case '%':
		case '0':  case '1':  case '2':  case '3':  case '4':
		case '5':  case '6':  case '7':  case '8':  case '9':
		case '<':  case '>':  case 'V':  case 'o':
		case 'E':  case 'F':  case 'G':  case 'H':
		case 's':  case 'i':  case 'n':
		case 'I':  case 'J':  case 'K':  case 'L':
		case 'M':  case 'N':  case 'O':  case 'P':
		case 'p':  case 'X':  case TARGET_MEM_CONSTRAINT:
		  /* These don't say anything we care about.  */
		  break;

		case 'g': case 'r':
		  rclass = reg_class_subunion[(int) rclass][(int) GENERAL_REGS];
		  break;

		default:
		  rclass
		    = (reg_class_subunion
		       [(int) rclass]
		       [(int) REG_CLASS_FROM_CONSTRAINT ((unsigned char) c, p)]);
		  break;

		case ',': case '\0':
		  /* See if REGNO fits this alternative, and set it up as the
		     replacement register if we don't have one for this
		     alternative yet and the operand being replaced is not
		     a cheap CONST_INT.  */
		  if (op_alt_regno[i][j] == -1
		      && reg_fits_class_p (testreg, rclass, 0, mode)
		      && (GET_CODE (recog_data.operand[i]) != CONST_INT
			  || (rtx_cost (recog_data.operand[i], SET,
			  		optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)))
			      > rtx_cost (testreg, SET,
			  		optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn))))))
		    {
		      alternative_nregs[j]++;
		      op_alt_regno[i][j] = regno;
		    }
		  j++;
		  rclass = (int) NO_REGS;
		  break;
		}
	      p += CONSTRAINT_LEN (c, p);

	      if (c == '\0')
		break;
	    }
	}
    }

  /* Record all alternatives which are better or equal to the currently
     matching one in the alternative_order array.  */
  for (i = j = 0; i < recog_data.n_alternatives; i++)
    if (alternative_reject[i] <= alternative_reject[which_alternative])
      alternative_order[j++] = i;
  recog_data.n_alternatives = j;

  /* Sort it.  Given a small number of alternatives, a dumb algorithm
     won't hurt too much.  */
  for (i = 0; i < recog_data.n_alternatives - 1; i++)
    {
      int best = i;
      int best_reject = alternative_reject[alternative_order[i]];
      int best_nregs = alternative_nregs[alternative_order[i]];
      int tmp;

      for (j = i + 1; j < recog_data.n_alternatives; j++)
	{
	  int this_reject = alternative_reject[alternative_order[j]];
	  int this_nregs = alternative_nregs[alternative_order[j]];

	  if (this_reject < best_reject
	      || (this_reject == best_reject && this_nregs > best_nregs))
	    {
	      best = j;
	      best_reject = this_reject;
	      best_nregs = this_nregs;
	    }
	}

      tmp = alternative_order[best];
      alternative_order[best] = alternative_order[i];
      alternative_order[i] = tmp;
    }

  /* Substitute the operands as determined by op_alt_regno for the best
     alternative.  */
  j = alternative_order[0];

  for (i = 0; i < recog_data.n_operands; i++)
    {
      enum machine_mode mode = recog_data.operand_mode[i];
      if (op_alt_regno[i][j] == -1)
	continue;

      validate_change (insn, recog_data.operand_loc[i],
		       gen_rtx_REG (mode, op_alt_regno[i][j]), 1);
    }

  for (i = recog_data.n_dups - 1; i >= 0; i--)
    {
      int op = recog_data.dup_num[i];
      enum machine_mode mode = recog_data.operand_mode[op];

      if (op_alt_regno[op][j] == -1)
	continue;

      validate_change (insn, recog_data.dup_loc[i],
		       gen_rtx_REG (mode, op_alt_regno[op][j]), 1);
    }

  return apply_change_group ();
}
示例#20
0
/* Do transforms 3) and 4) on INSN if applicable.  */
static bool
mod_subtract_transform (rtx insn)
{
  rtx set, set_src, set_dest, op1, op2, value, histogram;
  enum rtx_code code;
  enum machine_mode mode;
  gcov_type wrong_values, counts[2], count, all;
  edge e;
  int i, prob1, prob2;

  set = single_set (insn);
  if (!set)
    return false;

  set_src = SET_SRC (set);
  set_dest = SET_DEST (set);
  code = GET_CODE (set_src);
  mode = GET_MODE (set_dest);
  
  if (code != UMOD)
    return false;
  op1 = XEXP (set_src, 0);
  op2 = XEXP (set_src, 1);

  for (histogram = REG_NOTES (insn);
       histogram;
       histogram = XEXP (histogram, 1))
    if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
	&& XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
      break;

  if (!histogram)
    return false;

  histogram = XEXP (XEXP (histogram, 0), 1);
  value = XEXP (histogram, 0);
  histogram = XEXP (histogram, 1);

  all = 0;
  for (i = 0; i < 2; i++)
    {
      counts[i] = INTVAL (XEXP (histogram, 0));
      all += counts[i];
      histogram = XEXP (histogram, 1);
    }
  wrong_values = INTVAL (XEXP (histogram, 0));
  histogram = XEXP (histogram, 1);
  wrong_values += INTVAL (XEXP (histogram, 0));
  all += wrong_values;

  /* We require that we use just subtractions in at least 50% of all
     evaluations.  */
  count = 0;
  for (i = 0; i < 2; i++)
    {
      count += counts[i];
      if (count * 2 >= all)
	break;
    }
  
  if (i == 2)
    return false;

  if (dump_file)
    fprintf (dump_file, "Mod subtract transformation on insn %d\n",
	     INSN_UID (insn));

  /* Compute probability of taking the optimal path(s).  */
  prob1 = (counts[0] * REG_BR_PROB_BASE + all / 2) / all;
  prob2 = (counts[1] * REG_BR_PROB_BASE + all / 2) / all;

  e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
  delete_insn (insn);
  
  insert_insn_on_edge (
	gen_mod_subtract (mode, code, set_dest,
			  op1, op2, i, prob1, prob2), e);

  return true;
}
示例#21
0
void
optimize_sibling_and_tail_recursive_calls (void)
{
  rtx insn, insns;
  basic_block alternate_exit = EXIT_BLOCK_PTR;
  bool no_sibcalls_this_function = false;
  bool successful_replacement = false;
  bool replaced_call_placeholder = false;
  edge e;

  insns = get_insns ();

  cleanup_cfg (CLEANUP_PRE_SIBCALL | CLEANUP_PRE_LOOP);

  /* If there are no basic blocks, then there is nothing to do.  */
  if (n_basic_blocks == 0)
    return;

  /* If we are using sjlj exceptions, we may need to add a call to
     _Unwind_SjLj_Unregister at exit of the function.  Which means
     that we cannot do any sibcall transformations.  */
  if (USING_SJLJ_EXCEPTIONS && current_function_has_exception_handlers ())
    no_sibcalls_this_function = true;

  return_value_pseudo = NULL_RTX;

  /* Find the exit block.

     It is possible that we have blocks which can reach the exit block
     directly.  However, most of the time a block will jump (or fall into)
     N_BASIC_BLOCKS - 1, which in turn falls into the exit block.  */
  for (e = EXIT_BLOCK_PTR->pred;
       e && alternate_exit == EXIT_BLOCK_PTR;
       e = e->pred_next)
    {
      rtx insn;

      if (e->dest != EXIT_BLOCK_PTR || e->succ_next != NULL)
	continue;

      /* Walk forwards through the last normal block and see if it
	 does nothing except fall into the exit block.  */
      for (insn = BB_HEAD (EXIT_BLOCK_PTR->prev_bb);
	   insn;
	   insn = NEXT_INSN (insn))
	{
	  rtx set;
	  /* This should only happen once, at the start of this block.  */
	  if (GET_CODE (insn) == CODE_LABEL)
	    continue;

	  if (GET_CODE (insn) == NOTE)
	    continue;

	  if (GET_CODE (insn) == INSN
	      && GET_CODE (PATTERN (insn)) == USE)
	    continue;

	  /* Exit block also may contain copy from pseudo containing
	     return value to hard register.  */
	  if (GET_CODE (insn) == INSN
	      && (set = single_set (insn))
	      && SET_DEST (set) == current_function_return_rtx
	      && REG_P (SET_SRC (set))
	      && !return_value_pseudo)
	    {
	      return_value_pseudo = SET_SRC (set);
	      continue;
	    }

	  break;
	}

      /* If INSN is zero, then the search walked all the way through the
	 block without hitting anything interesting.  This block is a
	 valid alternate exit block.  */
      if (insn == NULL)
	alternate_exit = e->src;
      else
	return_value_pseudo = NULL;
    }

  /* If the function uses ADDRESSOF, we can't (easily) determine
     at this point if the value will end up on the stack.  */
  no_sibcalls_this_function |= sequence_uses_addressof (insns);

  /* Walk the insn chain and find any CALL_PLACEHOLDER insns.  We need to
     select one of the insn sequences attached to each CALL_PLACEHOLDER.

     The different sequences represent different ways to implement the call,
     ie, tail recursion, sibling call or normal call.

     Since we do not create nested CALL_PLACEHOLDERs, the scan
     continues with the insn that was after a replaced CALL_PLACEHOLDER;
     we don't rescan the replacement insns.  */
  for (insn = insns; insn; insn = NEXT_INSN (insn))
    {
      if (GET_CODE (insn) == CALL_INSN
	  && GET_CODE (PATTERN (insn)) == CALL_PLACEHOLDER)
	{
	  int sibcall = (XEXP (PATTERN (insn), 1) != NULL_RTX);
	  int tailrecursion = (XEXP (PATTERN (insn), 2) != NULL_RTX);
	  basic_block call_block = BLOCK_FOR_INSN (insn);

	  /* alloca (until we have stack slot life analysis) inhibits
	     sibling call optimizations, but not tail recursion.
	     Similarly if we use varargs or stdarg since they implicitly
	     may take the address of an argument.  */
	  if (current_function_calls_alloca || current_function_stdarg)
	    sibcall = 0;

	  /* See if there are any reasons we can't perform either sibling or
	     tail call optimizations.  We must be careful with stack slots
	     which are live at potential optimization sites.  */
	  if (no_sibcalls_this_function
	      /* ??? Overly conservative.  */
	      || frame_offset
	      /* Any function that calls setjmp might have longjmp called from
		 any called function.  ??? We really should represent this
		 properly in the CFG so that this needn't be special cased.  */
	      || current_function_calls_setjmp
	      /* Can't if more than one successor or single successor is not
		 exit block.  These two tests prevent tail call optimization
		 in the presence of active exception handlers.  */
	      || call_block->succ == NULL
	      || call_block->succ->succ_next != NULL
	      || (call_block->succ->dest != EXIT_BLOCK_PTR
		  && call_block->succ->dest != alternate_exit)
	      /* If this call doesn't end the block, there are operations at
		 the end of the block which we must execute after returning.  */
	      || ! call_ends_block_p (insn, BB_END (call_block)))
	    sibcall = 0, tailrecursion = 0;

	  /* Select a set of insns to implement the call and emit them.
	     Tail recursion is the most efficient, so select it over
	     a tail/sibling call.  */

	  if (sibcall || tailrecursion)
	    successful_replacement = true;
	  replaced_call_placeholder = true;

	  replace_call_placeholder (insn,
				    tailrecursion != 0
				      ? sibcall_use_tail_recursion
				      : sibcall != 0
					 ? sibcall_use_sibcall
					 : sibcall_use_normal);
	}
    }

  if (successful_replacement)
    {
      rtx insn;
      tree arg;

      /* A sibling call sequence invalidates any REG_EQUIV notes made for
	 this function's incoming arguments.

	 At the start of RTL generation we know the only REG_EQUIV notes
	 in the rtl chain are those for incoming arguments, so we can safely
	 flush any REG_EQUIV note.

	 This is (slight) overkill.  We could keep track of the highest
	 argument we clobber and be more selective in removing notes, but it
	 does not seem to be worth the effort.  */
      purge_reg_equiv_notes ();

      /* A sibling call sequence also may invalidate RTX_UNCHANGING_P
	 flag of some incoming arguments MEM RTLs, because it can write into
	 those slots.  We clear all those bits now.

	 This is (slight) overkill, we could keep track of which arguments
	 we actually write into.  */
      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
	{
	  if (INSN_P (insn))
	    purge_mem_unchanging_flag (PATTERN (insn));
	}

      /* Similarly, invalidate RTX_UNCHANGING_P for any incoming
	 arguments passed in registers.  */
      for (arg = DECL_ARGUMENTS (current_function_decl);
	   arg;
	   arg = TREE_CHAIN (arg))
	{
	  if (REG_P (DECL_RTL (arg)))
	    RTX_UNCHANGING_P (DECL_RTL (arg)) = false;
	}
    }

  /* There may have been NOTE_INSN_BLOCK_{BEGIN,END} notes in the
     CALL_PLACEHOLDER alternatives that we didn't emit.  Rebuild the
     lexical block tree to correspond to the notes that still exist.  */
  if (replaced_call_placeholder)
    reorder_blocks ();

  /* This information will be invalid after inline expansion.  Kill it now.  */
  free_basic_block_vars (0);
  free_EXPR_LIST_list (&tail_recursion_label_list);
}
示例#22
0
/* Do transform 2) on INSN if applicable.  */
static bool
mod_pow2_value_transform (rtx insn)
{
  rtx set, set_src, set_dest, op1, op2, value, histogram;
  enum rtx_code code;
  enum machine_mode mode;
  gcov_type wrong_values, count;
  edge e;
  int i, all, prob;

  set = single_set (insn);
  if (!set)
    return false;

  set_src = SET_SRC (set);
  set_dest = SET_DEST (set);
  code = GET_CODE (set_src);
  mode = GET_MODE (set_dest);
  
  if (code != UMOD)
    return false;
  op1 = XEXP (set_src, 0);
  op2 = XEXP (set_src, 1);

  for (histogram = REG_NOTES (insn);
       histogram;
       histogram = XEXP (histogram, 1))
    if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
	&& XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
      break;

  if (!histogram)
    return false;

  histogram = XEXP (XEXP (histogram, 0), 1);
  value = XEXP (histogram, 0);
  histogram = XEXP (histogram, 1);
  wrong_values =INTVAL (XEXP (histogram, 0));
  histogram = XEXP (histogram, 1);

  count = 0;
  for (i = 0; i < GET_MODE_BITSIZE (mode); i++)
    {
      count += INTVAL (XEXP (histogram, 0));
      histogram = XEXP (histogram, 1);
    }

  if (!rtx_equal_p (op2, value))
    return false;

  /* We require that we hit a power of two at least half of all evaluations.  */
  if (count < wrong_values)
    return false;

  if (dump_file)
    fprintf (dump_file, "Mod power of 2 transformation on insn %d\n",
	     INSN_UID (insn));

  /* Compute probability of taking the optimal path.  */
  all = count + wrong_values;
  prob = (count * REG_BR_PROB_BASE + all / 2) / all;

  e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
  delete_insn (insn);
  
  insert_insn_on_edge (
	gen_mod_pow2 (mode, code, set_dest, op1, op2, prob), e);

  return true;
}
示例#23
0
static bool
attempt_change (rtx new_addr, rtx inc_reg)
{
  /* There are four cases: For the two cases that involve an add
     instruction, we are going to have to delete the add and insert a
     mov.  We are going to assume that the mov is free.  This is
     fairly early in the backend and there are a lot of opportunities
     for removing that move later.  In particular, there is the case
     where the move may be dead, this is what dead code elimination
     passes are for.  The two cases where we have an inc insn will be
     handled mov free.  */

  basic_block bb = BLOCK_FOR_INSN (mem_insn.insn);
  rtx mov_insn = NULL;
  int regno;
  rtx mem = *mem_insn.mem_loc;
  enum machine_mode mode = GET_MODE (mem);
  rtx new_mem;
  int old_cost = 0;
  int new_cost = 0;
  bool speed = optimize_bb_for_speed_p (bb);

  PUT_MODE (mem_tmp, mode);
  XEXP (mem_tmp, 0) = new_addr;

  old_cost = (set_src_cost (mem, speed)
	      + set_rtx_cost (PATTERN (inc_insn.insn), speed));
  new_cost = set_src_cost (mem_tmp, speed);

  /* The first item of business is to see if this is profitable.  */
  if (old_cost < new_cost)
    {
      if (dump_file)
	fprintf (dump_file, "cost failure old=%d new=%d\n", old_cost, new_cost);
      return false;
    }

  /* Jump through a lot of hoops to keep the attributes up to date.  We
     do not want to call one of the change address variants that take
     an offset even though we know the offset in many cases.  These
     assume you are changing where the address is pointing by the
     offset.  */
  new_mem = replace_equiv_address_nv (mem, new_addr);
  if (! validate_change (mem_insn.insn, mem_insn.mem_loc, new_mem, 0))
    {
      if (dump_file)
	fprintf (dump_file, "validation failure\n");
      return false;
    }

  /* From here to the end of the function we are committed to the
     change, i.e. nothing fails.  Generate any necessary movs, move
     any regnotes, and fix up the reg_next_{use,inc_use,def}.  */
  switch (inc_insn.form)
    {
    case FORM_PRE_ADD:
      /* Replace the addition with a move.  Do it at the location of
	 the addition since the operand of the addition may change
	 before the memory reference.  */
      mov_insn = insert_move_insn_before (inc_insn.insn,
					  inc_insn.reg_res, inc_insn.reg0);
      move_dead_notes (mov_insn, inc_insn.insn, inc_insn.reg0);

      regno = REGNO (inc_insn.reg_res);
      reg_next_def[regno] = mov_insn;
      reg_next_use[regno] = NULL;
      regno = REGNO (inc_insn.reg0);
      reg_next_use[regno] = mov_insn;
      df_recompute_luids (bb);
      break;

    case FORM_POST_INC:
      regno = REGNO (inc_insn.reg_res);
      if (reg_next_use[regno] == reg_next_inc_use[regno])
	reg_next_inc_use[regno] = NULL;

      /* Fallthru.  */
    case FORM_PRE_INC:
      regno = REGNO (inc_insn.reg_res);
      reg_next_def[regno] = mem_insn.insn;
      reg_next_use[regno] = NULL;

      break;

    case FORM_POST_ADD:
      mov_insn = insert_move_insn_before (mem_insn.insn,
					  inc_insn.reg_res, inc_insn.reg0);
      move_dead_notes (mov_insn, inc_insn.insn, inc_insn.reg0);

      /* Do not move anything to the mov insn because the instruction
	 pointer for the main iteration has not yet hit that.  It is
	 still pointing to the mem insn. */
      regno = REGNO (inc_insn.reg_res);
      reg_next_def[regno] = mem_insn.insn;
      reg_next_use[regno] = NULL;

      regno = REGNO (inc_insn.reg0);
      reg_next_use[regno] = mem_insn.insn;
      if ((reg_next_use[regno] == reg_next_inc_use[regno])
	  || (reg_next_inc_use[regno] == inc_insn.insn))
	reg_next_inc_use[regno] = NULL;
      df_recompute_luids (bb);
      break;

    case FORM_last:
    default:
      gcc_unreachable ();
    }

  if (!inc_insn.reg1_is_const)
    {
      regno = REGNO (inc_insn.reg1);
      reg_next_use[regno] = mem_insn.insn;
      if ((reg_next_use[regno] == reg_next_inc_use[regno])
	  || (reg_next_inc_use[regno] == inc_insn.insn))
	reg_next_inc_use[regno] = NULL;
    }

  delete_insn (inc_insn.insn);

  if (dump_file && mov_insn)
    {
      fprintf (dump_file, "inserting mov ");
      dump_insn_slim (dump_file, mov_insn);
    }

  /* Record that this insn has an implicit side effect.  */
  add_reg_note (mem_insn.insn, REG_INC, inc_reg);

  if (dump_file)
    {
      fprintf (dump_file, "****success ");
      dump_insn_slim (dump_file, mem_insn.insn);
    }

  return true;
}
示例#24
0
static int
reload_cse_simplify_set (rtx set, rtx insn)
{
  int did_change = 0;
  int dreg;
  rtx src;
  enum reg_class dclass;
  int old_cost;
  cselib_val *val;
  struct elt_loc_list *l;
#ifdef LOAD_EXTEND_OP
  enum rtx_code extend_op = UNKNOWN;
#endif
  bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));

  dreg = true_regnum (SET_DEST (set));
  if (dreg < 0)
    return 0;

  src = SET_SRC (set);
  if (side_effects_p (src) || true_regnum (src) >= 0)
    return 0;

  dclass = REGNO_REG_CLASS (dreg);

#ifdef LOAD_EXTEND_OP
  /* When replacing a memory with a register, we need to honor assumptions
     that combine made wrt the contents of sign bits.  We'll do this by
     generating an extend instruction instead of a reg->reg copy.  Thus
     the destination must be a register that we can widen.  */
  if (MEM_P (src)
      && GET_MODE_BITSIZE (GET_MODE (src)) < BITS_PER_WORD
      && (extend_op = LOAD_EXTEND_OP (GET_MODE (src))) != UNKNOWN
      && !REG_P (SET_DEST (set)))
    return 0;
#endif

  val = cselib_lookup (src, GET_MODE (SET_DEST (set)), 0);
  if (! val)
    return 0;

  /* If memory loads are cheaper than register copies, don't change them.  */
  if (MEM_P (src))
    old_cost = MEMORY_MOVE_COST (GET_MODE (src), dclass, 1);
  else if (REG_P (src))
    old_cost = REGISTER_MOVE_COST (GET_MODE (src),
				   REGNO_REG_CLASS (REGNO (src)), dclass);
  else
    old_cost = rtx_cost (src, SET, speed);

  for (l = val->locs; l; l = l->next)
    {
      rtx this_rtx = l->loc;
      int this_cost;

      if (CONSTANT_P (this_rtx) && ! references_value_p (this_rtx, 0))
	{
#ifdef LOAD_EXTEND_OP
	  if (extend_op != UNKNOWN)
	    {
	      HOST_WIDE_INT this_val;

	      /* ??? I'm lazy and don't wish to handle CONST_DOUBLE.  Other
		 constants, such as SYMBOL_REF, cannot be extended.  */
	      if (GET_CODE (this_rtx) != CONST_INT)
		continue;

	      this_val = INTVAL (this_rtx);
	      switch (extend_op)
		{
		case ZERO_EXTEND:
		  this_val &= GET_MODE_MASK (GET_MODE (src));
		  break;
		case SIGN_EXTEND:
		  /* ??? In theory we're already extended.  */
		  if (this_val == trunc_int_for_mode (this_val, GET_MODE (src)))
		    break;
		default:
		  gcc_unreachable ();
		}
	      this_rtx = GEN_INT (this_val);
	    }
#endif
	  this_cost = rtx_cost (this_rtx, SET, speed);
	}
      else if (REG_P (this_rtx))
	{
#ifdef LOAD_EXTEND_OP
	  if (extend_op != UNKNOWN)
	    {
	      this_rtx = gen_rtx_fmt_e (extend_op, word_mode, this_rtx);
	      this_cost = rtx_cost (this_rtx, SET, speed);
	    }
	  else
#endif
	    this_cost = REGISTER_MOVE_COST (GET_MODE (this_rtx),
					    REGNO_REG_CLASS (REGNO (this_rtx)),
					    dclass);
	}
      else
	continue;

      /* If equal costs, prefer registers over anything else.  That
	 tends to lead to smaller instructions on some machines.  */
      if (this_cost < old_cost
	  || (this_cost == old_cost
	      && REG_P (this_rtx)
	      && !REG_P (SET_SRC (set))))
	{
#ifdef LOAD_EXTEND_OP
	  if (GET_MODE_BITSIZE (GET_MODE (SET_DEST (set))) < BITS_PER_WORD
	      && extend_op != UNKNOWN
#ifdef CANNOT_CHANGE_MODE_CLASS
	      && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SET_DEST (set)),
					    word_mode,
					    REGNO_REG_CLASS (REGNO (SET_DEST (set))))
#endif
	      )
	    {
	      rtx wide_dest = gen_rtx_REG (word_mode, REGNO (SET_DEST (set)));
	      ORIGINAL_REGNO (wide_dest) = ORIGINAL_REGNO (SET_DEST (set));
	      validate_change (insn, &SET_DEST (set), wide_dest, 1);
	    }
#endif

	  validate_unshare_change (insn, &SET_SRC (set), this_rtx, 1);
	  old_cost = this_cost, did_change = 1;
	}
    }

  return did_change;
}
示例#25
0
static bool
speculative_prefetching_transform (rtx insn)
{
  rtx histogram, value;
  gcov_type val, count, all;
  edge e;
  rtx mem, address;
  int write;

  if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
    return false;

  if (!find_mem_reference (insn, &mem, &write))
    return false;

  address = XEXP (mem, 0);
  if (side_effects_p (address))
    return false;
      
  if (CONSTANT_P (address))
    return false;

  for (histogram = REG_NOTES (insn);
       histogram;
       histogram = XEXP (histogram, 1))
    if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
	&& XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA))
      break;

  if (!histogram)
    return false;

  histogram = XEXP (XEXP (histogram, 0), 1);
  value = XEXP (histogram, 0);
  histogram = XEXP (histogram, 1);
  /* Skip last value referenced.  */
  histogram = XEXP (histogram, 1);
  val = INTVAL (XEXP (histogram, 0));
  histogram = XEXP (histogram, 1);
  count = INTVAL (XEXP (histogram, 0));
  histogram = XEXP (histogram, 1);
  all = INTVAL (XEXP (histogram, 0));

  /* With that few executions we do not really have a reason to optimize the
     statement, and more importantly, the data about differences of addresses
     are spoiled by the first item that had no previous value to compare
     with.  */
  if (all < 4)
    return false;

  /* We require that count be at least half of all; this means
     that for the transformation to fire the value must be constant
     at least 50% of time (and 75% gives the guarantee of usage).  */
  if (!rtx_equal_p (address, value) || 2 * count < all)
    return false;

  /* If the difference is too small, it does not make too much sense to
     prefetch, as the memory is probably already in cache.  */
  if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX)
    return false;

  if (dump_file)
    fprintf (dump_file, "Speculative prefetching for insn %d\n",
	     INSN_UID (insn));

  e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
  
  insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e);

  return true;
}