Exemple #1
0
static bool
insn_prefetch_values_to_profile (rtx insn, histogram_values *values)
{
  rtx mem, address;
  int write;
  histogram_value hist;

  /* It only makes sense to look for memory references in ordinary insns.  */
  if (GET_CODE (insn) != INSN)
    return false;

  if (!find_mem_reference (insn, &mem, &write))
    return false;

  address = XEXP (mem, 0);
  if (side_effects_p (address))
    return false;

  /* APPLE LOCAL begin should be in FSF, and has been submitted.  */
  if (GET_CODE (PATTERN (insn)) == CLOBBER)
    return false;
  /* APPLE LOCAL end should be in FSF, and has been submitted.  */
      
  if (CONSTANT_P (address))
    return false;

  hist = ggc_alloc (sizeof (*hist));
  hist->value = address;
  hist->mode = GET_MODE (address);
  hist->seq = NULL_RTX;
  hist->insn = insn;
  hist->type = HIST_TYPE_CONST_DELTA;
  VEC_safe_push (histogram_value, *values, hist);

  return true;
}
Exemple #2
0
static bool
propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
{
  rtx x = *px, tem = NULL_RTX, op0, op1, op2;
  enum rtx_code code = GET_CODE (x);
  machine_mode mode = GET_MODE (x);
  machine_mode op_mode;
  bool can_appear = (flags & PR_CAN_APPEAR) != 0;
  bool valid_ops = true;

  if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
    {
      /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
	 they have side effects or not).  */
      *px = (side_effects_p (x)
	     ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
	     : gen_rtx_SCRATCH (GET_MODE (x)));
      return false;
    }

  /* If X is OLD_RTX, return NEW_RTX.  But not if replacing only within an
     address, and we are *not* inside one.  */
  if (x == old_rtx)
    {
      *px = new_rtx;
      return can_appear;
    }

  /* If this is an expression, try recursive substitution.  */
  switch (GET_RTX_CLASS (code))
    {
    case RTX_UNARY:
      op0 = XEXP (x, 0);
      op_mode = GET_MODE (op0);
      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
      if (op0 == XEXP (x, 0))
	return true;
      tem = simplify_gen_unary (code, mode, op0, op_mode);
      break;

    case RTX_BIN_ARITH:
    case RTX_COMM_ARITH:
      op0 = XEXP (x, 0);
      op1 = XEXP (x, 1);
      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
      valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
      if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
	return true;
      tem = simplify_gen_binary (code, mode, op0, op1);
      break;

    case RTX_COMPARE:
    case RTX_COMM_COMPARE:
      op0 = XEXP (x, 0);
      op1 = XEXP (x, 1);
      op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
      valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
      if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
	return true;
      tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
      break;

    case RTX_TERNARY:
    case RTX_BITFIELD_OPS:
      op0 = XEXP (x, 0);
      op1 = XEXP (x, 1);
      op2 = XEXP (x, 2);
      op_mode = GET_MODE (op0);
      valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
      valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
      valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
      if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
	return true;
      if (op_mode == VOIDmode)
	op_mode = GET_MODE (op0);
      tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
      break;

    case RTX_EXTRA:
      /* The only case we try to handle is a SUBREG.  */
      if (code == SUBREG)
	{
          op0 = XEXP (x, 0);
	  valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
          if (op0 == XEXP (x, 0))
	    return true;
	  tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
				     SUBREG_BYTE (x));
	}
      break;

    case RTX_OBJ:
      if (code == MEM && x != new_rtx)
	{
	  rtx new_op0;
	  op0 = XEXP (x, 0);

	  /* There are some addresses that we cannot work on.  */
	  if (!can_simplify_addr (op0))
	    return true;

	  op0 = new_op0 = targetm.delegitimize_address (op0);
	  valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
					flags | PR_CAN_APPEAR);

	  /* Dismiss transformation that we do not want to carry on.  */
	  if (!valid_ops
	      || new_op0 == op0
	      || !(GET_MODE (new_op0) == GET_MODE (op0)
		   || GET_MODE (new_op0) == VOIDmode))
	    return true;

	  canonicalize_address (new_op0);

	  /* Copy propagations are always ok.  Otherwise check the costs.  */
	  if (!(REG_P (old_rtx) && REG_P (new_rtx))
	      && !should_replace_address (op0, new_op0, GET_MODE (x),
					  MEM_ADDR_SPACE (x),
	      			 	  flags & PR_OPTIMIZE_FOR_SPEED))
	    return true;

	  tem = replace_equiv_address_nv (x, new_op0);
	}

      else if (code == LO_SUM)
	{
          op0 = XEXP (x, 0);
          op1 = XEXP (x, 1);

	  /* The only simplification we do attempts to remove references to op0
	     or make it constant -- in both cases, op0's invalidity will not
	     make the result invalid.  */
	  propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
	  valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
          if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
	    return true;

	  /* (lo_sum (high x) x) -> x  */
	  if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
	    tem = op1;
	  else
	    tem = gen_rtx_LO_SUM (mode, op0, op1);

	  /* OP1 is likely not a legitimate address, otherwise there would have
	     been no LO_SUM.  We want it to disappear if it is invalid, return
	     false in that case.  */
	  return memory_address_p (mode, tem);
	}

      else if (code == REG)
	{
	  if (rtx_equal_p (x, old_rtx))
	    {
              *px = new_rtx;
              return can_appear;
	    }
	}
      break;

    default:
      break;
    }

  /* No change, no trouble.  */
  if (tem == NULL_RTX)
    return true;

  *px = tem;

  /* Allow replacements that simplify operations on a vector or complex
     value to a component.  The most prominent case is
     (subreg ([vec_]concat ...)).   */
  if (REG_P (tem) && !HARD_REGISTER_P (tem)
      && (VECTOR_MODE_P (GET_MODE (new_rtx))
	  || COMPLEX_MODE_P (GET_MODE (new_rtx)))
      && GET_MODE (tem) == GET_MODE_INNER (GET_MODE (new_rtx)))
    return true;

  /* The replacement we made so far is valid, if all of the recursive
     replacements were valid, or we could simplify everything to
     a constant.  */
  return valid_ops || can_appear || CONSTANT_P (tem);
}
Exemple #3
0
/* The major function for aggressive pseudo coalescing of moves only
   if the both pseudos were spilled and not special reload pseudos.  */
bool
lra_coalesce (void)
{
  basic_block bb;
  rtx mv, set, insn, next, *sorted_moves;
  int i, mv_num, sregno, dregno;
  int coalesced_moves;
  int max_regno = max_reg_num ();
  bitmap_head involved_insns_bitmap;

  timevar_push (TV_LRA_COALESCE);

  if (lra_dump_file != NULL)
    fprintf (lra_dump_file,
	     "\n********** Pseudos coalescing #%d: **********\n\n",
	     ++lra_coalesce_iter);
  first_coalesced_pseudo = XNEWVEC (int, max_regno);
  next_coalesced_pseudo = XNEWVEC (int, max_regno);
  for (i = 0; i < max_regno; i++)
    first_coalesced_pseudo[i] = next_coalesced_pseudo[i] = i;
  sorted_moves = XNEWVEC (rtx, get_max_uid ());
  mv_num = 0;
  /* Collect moves.  */
  coalesced_moves = 0;
  FOR_EACH_BB (bb)
    {
      FOR_BB_INSNS_SAFE (bb, insn, next)
	if (INSN_P (insn)
	    && (set = single_set (insn)) != NULL_RTX
	    && REG_P (SET_DEST (set)) && REG_P (SET_SRC (set))
	    && (sregno = REGNO (SET_SRC (set))) >= FIRST_PSEUDO_REGISTER
	    && (dregno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER
	    && mem_move_p (sregno, dregno)
	    && coalescable_pseudo_p (sregno) && coalescable_pseudo_p (dregno)
	    && ! side_effects_p (set)
	    && !(lra_intersected_live_ranges_p
		 (lra_reg_info[sregno].live_ranges,
		  lra_reg_info[dregno].live_ranges)))
	  sorted_moves[mv_num++] = insn;
    }
  qsort (sorted_moves, mv_num, sizeof (rtx), move_freq_compare_func);
  /* Coalesced copies, most frequently executed first.	*/
  bitmap_initialize (&coalesced_pseudos_bitmap, &reg_obstack);
  bitmap_initialize (&involved_insns_bitmap, &reg_obstack);
  for (i = 0; i < mv_num; i++)
    {
      mv = sorted_moves[i];
      set = single_set (mv);
      lra_assert (set != NULL && REG_P (SET_SRC (set))
		  && REG_P (SET_DEST (set)));
      sregno = REGNO (SET_SRC (set));
      dregno = REGNO (SET_DEST (set));
      if (first_coalesced_pseudo[sregno] == first_coalesced_pseudo[dregno])
	{
	  coalesced_moves++;
	  if (lra_dump_file != NULL)
	    fprintf
	      (lra_dump_file, "      Coalescing move %i:r%d-r%d (freq=%d)\n",
	       INSN_UID (mv), sregno, dregno,
	       BLOCK_FOR_INSN (mv)->frequency);
	  /* We updated involved_insns_bitmap when doing the merge.  */
	}
      else if (!(lra_intersected_live_ranges_p
		 (lra_reg_info[first_coalesced_pseudo[sregno]].live_ranges,
		  lra_reg_info[first_coalesced_pseudo[dregno]].live_ranges)))
	{
	  coalesced_moves++;
	  if (lra_dump_file != NULL)
	    fprintf
	      (lra_dump_file,
	       "  Coalescing move %i:r%d(%d)-r%d(%d) (freq=%d)\n",
	       INSN_UID (mv), sregno, ORIGINAL_REGNO (SET_SRC (set)),
	       dregno, ORIGINAL_REGNO (SET_DEST (set)),
	       BLOCK_FOR_INSN (mv)->frequency);
	  bitmap_ior_into (&involved_insns_bitmap,
			   &lra_reg_info[sregno].insn_bitmap);
	  bitmap_ior_into (&involved_insns_bitmap,
			   &lra_reg_info[dregno].insn_bitmap);
	  merge_pseudos (sregno, dregno);
	}
    }
  bitmap_initialize (&used_pseudos_bitmap, &reg_obstack);
  FOR_EACH_BB (bb)
    {
      update_live_info (df_get_live_in (bb));
      update_live_info (df_get_live_out (bb));
      FOR_BB_INSNS_SAFE (bb, insn, next)
	if (INSN_P (insn)
	    && bitmap_bit_p (&involved_insns_bitmap, INSN_UID (insn)))
	  {
	    if (! substitute (&insn))
	      continue;
	    lra_update_insn_regno_info (insn);
	    if ((set = single_set (insn)) != NULL_RTX && set_noop_p (set))
	      {
		/* Coalesced move.  */
		if (lra_dump_file != NULL)
		  fprintf (lra_dump_file, "	 Removing move %i (freq=%d)\n",
			 INSN_UID (insn), BLOCK_FOR_INSN (insn)->frequency);
		lra_set_insn_deleted (insn);
	      }
	  }
    }
  bitmap_clear (&used_pseudos_bitmap);
  bitmap_clear (&involved_insns_bitmap);
  bitmap_clear (&coalesced_pseudos_bitmap);
  if (lra_dump_file != NULL && coalesced_moves != 0)
    fprintf (lra_dump_file, "Coalesced Moves = %d\n", coalesced_moves);
  free (sorted_moves);
  free (next_coalesced_pseudo);
  free (first_coalesced_pseudo);
  timevar_pop (TV_LRA_COALESCE);
  return coalesced_moves != 0;
}
Exemple #4
0
static bool
speculative_prefetching_transform (rtx insn)
{
  rtx histogram, value;
  gcov_type val, count, all;
  edge e;
  rtx mem, address;
  int write;

  if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
    return false;

  if (!find_mem_reference (insn, &mem, &write))
    return false;

  address = XEXP (mem, 0);
  if (side_effects_p (address))
    return false;
      
  if (CONSTANT_P (address))
    return false;

  for (histogram = REG_NOTES (insn);
       histogram;
       histogram = XEXP (histogram, 1))
    if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
	&& XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA))
      break;

  if (!histogram)
    return false;

  histogram = XEXP (XEXP (histogram, 0), 1);
  value = XEXP (histogram, 0);
  histogram = XEXP (histogram, 1);
  /* Skip last value referenced.  */
  histogram = XEXP (histogram, 1);
  val = INTVAL (XEXP (histogram, 0));
  histogram = XEXP (histogram, 1);
  count = INTVAL (XEXP (histogram, 0));
  histogram = XEXP (histogram, 1);
  all = INTVAL (XEXP (histogram, 0));

  /* With that few executions we do not really have a reason to optimize the
     statement, and more importantly, the data about differences of addresses
     are spoiled by the first item that had no previous value to compare
     with.  */
  if (all < 4)
    return false;

  /* We require that count be at least half of all; this means
     that for the transformation to fire the value must be constant
     at least 50% of time (and 75% gives the guarantee of usage).  */
  if (!rtx_equal_p (address, value) || 2 * count < all)
    return false;

  /* If the difference is too small, it does not make too much sense to
     prefetch, as the memory is probably already in cache.  */
  if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX)
    return false;

  if (dump_file)
    fprintf (dump_file, "Speculative prefetching for insn %d\n",
	     INSN_UID (insn));

  e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
  
  insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e);

  return true;
}
Exemple #5
0
/* Find values inside INSN for that we want to measure histograms for
   division/modulo optimization and stores them to VALUES.  */
static void
insn_divmod_values_to_profile (rtx insn, histogram_values *values)
{
  rtx set, set_src, op1, op2;
  enum machine_mode mode;
  histogram_value hist;

  if (!INSN_P (insn))
    return;

  set = single_set (insn);
  if (!set)
    return;

  mode = GET_MODE (SET_DEST (set));
  if (!INTEGRAL_MODE_P (mode))
    return;

  set_src = SET_SRC (set);
  switch (GET_CODE (set_src))
    {
    case DIV:
    case MOD:
    case UDIV:
    case UMOD:
      op1 = XEXP (set_src, 0);
      op2 = XEXP (set_src, 1);
      if (side_effects_p (op2))
	return;

      /* Check for a special case where the divisor is power of 2.  */
      if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
	{
	  hist = ggc_alloc (sizeof (*hist));
	  hist->value = op2;
	  hist->seq = NULL_RTX;
	  hist->mode = mode;
	  hist->insn = insn;
	  hist->type = HIST_TYPE_POW2;
	  hist->hdata.pow2.may_be_other = 1;
	  VEC_safe_push (histogram_value, *values, hist);
	}

      /* Check whether the divisor is not in fact a constant.  */
      if (!CONSTANT_P (op2))
	{
	  hist = ggc_alloc (sizeof (*hist));
	  hist->value = op2;
	  hist->mode = mode;
	  hist->seq = NULL_RTX;
	  hist->insn = insn;
	  hist->type = HIST_TYPE_SINGLE_VALUE;
	  VEC_safe_push (histogram_value, *values, hist);
	}

      /* For mod, check whether it is not often a noop (or replaceable by
	 a few subtractions).  */
      if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
	{
	  rtx tmp;

	  hist = ggc_alloc (sizeof (*hist));
	  start_sequence ();
	  tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
	  hist->value = force_operand (tmp, NULL_RTX);
	  hist->seq = get_insns ();
	  end_sequence ();
	  hist->mode = mode;
	  hist->insn = insn;
	  hist->type = HIST_TYPE_INTERVAL;
	  hist->hdata.intvl.int_start = 0;
	  hist->hdata.intvl.steps = 2;
	  hist->hdata.intvl.may_be_less = 1;
	  hist->hdata.intvl.may_be_more = 1;
	  VEC_safe_push (histogram_value, *values, hist);
	}
      return;

    default:
      return;
    }
}
Exemple #6
0
static int
reload_cse_simplify_set (rtx set, rtx insn)
{
  int did_change = 0;
  int dreg;
  rtx src;
  enum reg_class dclass;
  int old_cost;
  cselib_val *val;
  struct elt_loc_list *l;
#ifdef LOAD_EXTEND_OP
  enum rtx_code extend_op = UNKNOWN;
#endif
  bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));

  dreg = true_regnum (SET_DEST (set));
  if (dreg < 0)
    return 0;

  src = SET_SRC (set);
  if (side_effects_p (src) || true_regnum (src) >= 0)
    return 0;

  dclass = REGNO_REG_CLASS (dreg);

#ifdef LOAD_EXTEND_OP
  /* When replacing a memory with a register, we need to honor assumptions
     that combine made wrt the contents of sign bits.  We'll do this by
     generating an extend instruction instead of a reg->reg copy.  Thus
     the destination must be a register that we can widen.  */
  if (MEM_P (src)
      && GET_MODE_BITSIZE (GET_MODE (src)) < BITS_PER_WORD
      && (extend_op = LOAD_EXTEND_OP (GET_MODE (src))) != UNKNOWN
      && !REG_P (SET_DEST (set)))
    return 0;
#endif

  val = cselib_lookup (src, GET_MODE (SET_DEST (set)), 0);
  if (! val)
    return 0;

  /* If memory loads are cheaper than register copies, don't change them.  */
  if (MEM_P (src))
    old_cost = MEMORY_MOVE_COST (GET_MODE (src), dclass, 1);
  else if (REG_P (src))
    old_cost = REGISTER_MOVE_COST (GET_MODE (src),
				   REGNO_REG_CLASS (REGNO (src)), dclass);
  else
    old_cost = rtx_cost (src, SET, speed);

  for (l = val->locs; l; l = l->next)
    {
      rtx this_rtx = l->loc;
      int this_cost;

      if (CONSTANT_P (this_rtx) && ! references_value_p (this_rtx, 0))
	{
#ifdef LOAD_EXTEND_OP
	  if (extend_op != UNKNOWN)
	    {
	      HOST_WIDE_INT this_val;

	      /* ??? I'm lazy and don't wish to handle CONST_DOUBLE.  Other
		 constants, such as SYMBOL_REF, cannot be extended.  */
	      if (GET_CODE (this_rtx) != CONST_INT)
		continue;

	      this_val = INTVAL (this_rtx);
	      switch (extend_op)
		{
		case ZERO_EXTEND:
		  this_val &= GET_MODE_MASK (GET_MODE (src));
		  break;
		case SIGN_EXTEND:
		  /* ??? In theory we're already extended.  */
		  if (this_val == trunc_int_for_mode (this_val, GET_MODE (src)))
		    break;
		default:
		  gcc_unreachable ();
		}
	      this_rtx = GEN_INT (this_val);
	    }
#endif
	  this_cost = rtx_cost (this_rtx, SET, speed);
	}
      else if (REG_P (this_rtx))
	{
#ifdef LOAD_EXTEND_OP
	  if (extend_op != UNKNOWN)
	    {
	      this_rtx = gen_rtx_fmt_e (extend_op, word_mode, this_rtx);
	      this_cost = rtx_cost (this_rtx, SET, speed);
	    }
	  else
#endif
	    this_cost = REGISTER_MOVE_COST (GET_MODE (this_rtx),
					    REGNO_REG_CLASS (REGNO (this_rtx)),
					    dclass);
	}
      else
	continue;

      /* If equal costs, prefer registers over anything else.  That
	 tends to lead to smaller instructions on some machines.  */
      if (this_cost < old_cost
	  || (this_cost == old_cost
	      && REG_P (this_rtx)
	      && !REG_P (SET_SRC (set))))
	{
#ifdef LOAD_EXTEND_OP
	  if (GET_MODE_BITSIZE (GET_MODE (SET_DEST (set))) < BITS_PER_WORD
	      && extend_op != UNKNOWN
#ifdef CANNOT_CHANGE_MODE_CLASS
	      && !CANNOT_CHANGE_MODE_CLASS (GET_MODE (SET_DEST (set)),
					    word_mode,
					    REGNO_REG_CLASS (REGNO (SET_DEST (set))))
#endif
	      )
	    {
	      rtx wide_dest = gen_rtx_REG (word_mode, REGNO (SET_DEST (set)));
	      ORIGINAL_REGNO (wide_dest) = ORIGINAL_REGNO (SET_DEST (set));
	      validate_change (insn, &SET_DEST (set), wide_dest, 1);
	    }
#endif

	  validate_unshare_change (insn, &SET_SRC (set), this_rtx, 1);
	  old_cost = this_cost, did_change = 1;
	}
    }

  return did_change;
}