Beispiel #1
0
static bool
should_replace_address (rtx old_rtx, rtx new_rtx, machine_mode mode,
			addr_space_t as, bool speed)
{
  int gain;

  if (rtx_equal_p (old_rtx, new_rtx)
      || !memory_address_addr_space_p (mode, new_rtx, as))
    return false;

  /* Copy propagation is always ok.  */
  if (REG_P (old_rtx) && REG_P (new_rtx))
    return true;

  /* Prefer the new address if it is less expensive.  */
  gain = (address_cost (old_rtx, mode, as, speed)
	  - address_cost (new_rtx, mode, as, speed));

  /* If the addresses have equivalent cost, prefer the new address
     if it has the highest `set_src_cost'.  That has the potential of
     eliminating the most insns without additional costs, and it
     is the same that cse.c used to do.  */
  if (gain == 0)
    gain = (set_src_cost (new_rtx, VOIDmode, speed)
	    - set_src_cost (old_rtx, VOIDmode, speed));

  return (gain > 0);
}
/* FIXME: This is the function that we need rtl.h and optabs.h for.
   This function (and similar RTL-related cost code in e.g. IVOPTS) should
   be moved to some kind of interface file for GIMPLE/RTL interactions.  */
static bool
lshift_cheap_p (void)
{
  /* FIXME: This should be made target dependent via this "this_target"
     mechanism, similar to e.g. can_copy_init_p in gcse.c.  */
  static bool init[2] = {false, false};
  static bool cheap[2] = {true, true};
  bool speed_p;

  /* If the targer has no lshift in word_mode, the operation will most
     probably not be cheap.  ??? Does GCC even work for such targets?  */
  if (optab_handler (ashl_optab, word_mode) == CODE_FOR_nothing)
    return false;

  speed_p = optimize_insn_for_speed_p ();

  if (!init[speed_p])
    {
      rtx reg = gen_raw_REG (word_mode, 10000);
      int cost = set_src_cost (gen_rtx_ASHIFT (word_mode, const1_rtx, reg),
			       speed_p);
      cheap[speed_p] = cost < COSTS_N_INSNS (MAX_CASE_BIT_TESTS);
      init[speed_p] = true;
    }

  return cheap[speed_p];
}
Beispiel #3
0
static void
emit_case_bit_tests (gswitch *swtch, tree index_expr,
		     tree minval, tree range, tree maxval)
{
  struct case_bit_test test[MAX_CASE_BIT_TESTS];
  unsigned int i, j, k;
  unsigned int count;

  basic_block switch_bb = gimple_bb (swtch);
  basic_block default_bb, new_default_bb, new_bb;
  edge default_edge;
  bool update_dom = dom_info_available_p (CDI_DOMINATORS);

  vec<basic_block> bbs_to_fix_dom = vNULL;

  tree index_type = TREE_TYPE (index_expr);
  tree unsigned_index_type = unsigned_type_for (index_type);
  unsigned int branch_num = gimple_switch_num_labels (swtch);

  gimple_stmt_iterator gsi;
  gassign *shift_stmt;

  tree idx, tmp, csui;
  tree word_type_node = lang_hooks.types.type_for_mode (word_mode, 1);
  tree word_mode_zero = fold_convert (word_type_node, integer_zero_node);
  tree word_mode_one = fold_convert (word_type_node, integer_one_node);
  int prec = TYPE_PRECISION (word_type_node);
  wide_int wone = wi::one (prec);

  memset (&test, 0, sizeof (test));

  /* Get the edge for the default case.  */
  tmp = gimple_switch_default_label (swtch);
  default_bb = label_to_block (CASE_LABEL (tmp));
  default_edge = find_edge (switch_bb, default_bb);

  /* Go through all case labels, and collect the case labels, profile
     counts, and other information we need to build the branch tests.  */
  count = 0;
  for (i = 1; i < branch_num; i++)
    {
      unsigned int lo, hi;
      tree cs = gimple_switch_label (swtch, i);
      tree label = CASE_LABEL (cs);
      edge e = find_edge (switch_bb, label_to_block (label));
      for (k = 0; k < count; k++)
	if (e == test[k].target_edge)
	  break;

      if (k == count)
	{
	  gcc_checking_assert (count < MAX_CASE_BIT_TESTS);
	  test[k].mask = wi::zero (prec);
	  test[k].target_edge = e;
	  test[k].label = label;
	  test[k].bits = 1;
	  count++;
	}
      else
        test[k].bits++;

      lo = tree_to_uhwi (int_const_binop (MINUS_EXPR,
					  CASE_LOW (cs), minval));
      if (CASE_HIGH (cs) == NULL_TREE)
	hi = lo;
      else
	hi = tree_to_uhwi (int_const_binop (MINUS_EXPR,
					    CASE_HIGH (cs), minval));

      for (j = lo; j <= hi; j++)
	test[k].mask |= wi::lshift (wone, j);
    }

  qsort (test, count, sizeof (*test), case_bit_test_cmp);

  /* If all values are in the 0 .. BITS_PER_WORD-1 range, we can get rid of
     the minval subtractions, but it might make the mask constants more
     expensive.  So, compare the costs.  */
  if (compare_tree_int (minval, 0) > 0
      && compare_tree_int (maxval, GET_MODE_BITSIZE (word_mode)) < 0)
    {
      int cost_diff;
      HOST_WIDE_INT m = tree_to_uhwi (minval);
      rtx reg = gen_raw_REG (word_mode, 10000);
      bool speed_p = optimize_bb_for_speed_p (gimple_bb (swtch));
      cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
					      GEN_INT (-m)), speed_p);
      for (i = 0; i < count; i++)
	{
	  rtx r = immed_wide_int_const (test[i].mask, word_mode);
	  cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
				     word_mode, speed_p);
	  r = immed_wide_int_const (wi::lshift (test[i].mask, m), word_mode);
	  cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
				     word_mode, speed_p);
	}
      if (cost_diff > 0)
	{
	  for (i = 0; i < count; i++)
	    test[i].mask = wi::lshift (test[i].mask, m);
	  minval = build_zero_cst (TREE_TYPE (minval));
	  range = maxval;
	}
    }

  /* We generate two jumps to the default case label.
     Split the default edge, so that we don't have to do any PHI node
     updating.  */
  new_default_bb = split_edge (default_edge);

  if (update_dom)
    {
      bbs_to_fix_dom.create (10);
      bbs_to_fix_dom.quick_push (switch_bb);
      bbs_to_fix_dom.quick_push (default_bb);
      bbs_to_fix_dom.quick_push (new_default_bb);
    }

  /* Now build the test-and-branch code.  */

  gsi = gsi_last_bb (switch_bb);

  /* idx = (unsigned)x - minval.  */
  idx = fold_convert (unsigned_index_type, index_expr);
  idx = fold_build2 (MINUS_EXPR, unsigned_index_type, idx,
		     fold_convert (unsigned_index_type, minval));
  idx = force_gimple_operand_gsi (&gsi, idx,
				  /*simple=*/true, NULL_TREE,
				  /*before=*/true, GSI_SAME_STMT);

  /* if (idx > range) goto default */
  range = force_gimple_operand_gsi (&gsi,
				    fold_convert (unsigned_index_type, range),
				    /*simple=*/true, NULL_TREE,
				    /*before=*/true, GSI_SAME_STMT);
  tmp = fold_build2 (GT_EXPR, boolean_type_node, idx, range);
  new_bb = hoist_edge_and_branch_if_true (&gsi, tmp, default_edge, update_dom);
  if (update_dom)
    bbs_to_fix_dom.quick_push (new_bb);
  gcc_assert (gimple_bb (swtch) == new_bb);
  gsi = gsi_last_bb (new_bb);

  /* Any blocks dominated by the GIMPLE_SWITCH, but that are not successors
     of NEW_BB, are still immediately dominated by SWITCH_BB.  Make it so.  */
  if (update_dom)
    {
      vec<basic_block> dom_bbs;
      basic_block dom_son;

      dom_bbs = get_dominated_by (CDI_DOMINATORS, new_bb);
      FOR_EACH_VEC_ELT (dom_bbs, i, dom_son)
	{
	  edge e = find_edge (new_bb, dom_son);
	  if (e && single_pred_p (e->dest))
	    continue;
	  set_immediate_dominator (CDI_DOMINATORS, dom_son, switch_bb);
	  bbs_to_fix_dom.safe_push (dom_son);
	}
      dom_bbs.release ();
    }
Beispiel #4
0
static bool
attempt_change (rtx new_addr, rtx inc_reg)
{
  /* There are four cases: For the two cases that involve an add
     instruction, we are going to have to delete the add and insert a
     mov.  We are going to assume that the mov is free.  This is
     fairly early in the backend and there are a lot of opportunities
     for removing that move later.  In particular, there is the case
     where the move may be dead, this is what dead code elimination
     passes are for.  The two cases where we have an inc insn will be
     handled mov free.  */

  basic_block bb = BLOCK_FOR_INSN (mem_insn.insn);
  rtx mov_insn = NULL;
  int regno;
  rtx mem = *mem_insn.mem_loc;
  enum machine_mode mode = GET_MODE (mem);
  rtx new_mem;
  int old_cost = 0;
  int new_cost = 0;
  bool speed = optimize_bb_for_speed_p (bb);

  PUT_MODE (mem_tmp, mode);
  XEXP (mem_tmp, 0) = new_addr;

  old_cost = (set_src_cost (mem, speed)
	      + set_rtx_cost (PATTERN (inc_insn.insn), speed));
  new_cost = set_src_cost (mem_tmp, speed);

  /* The first item of business is to see if this is profitable.  */
  if (old_cost < new_cost)
    {
      if (dump_file)
	fprintf (dump_file, "cost failure old=%d new=%d\n", old_cost, new_cost);
      return false;
    }

  /* Jump through a lot of hoops to keep the attributes up to date.  We
     do not want to call one of the change address variants that take
     an offset even though we know the offset in many cases.  These
     assume you are changing where the address is pointing by the
     offset.  */
  new_mem = replace_equiv_address_nv (mem, new_addr);
  if (! validate_change (mem_insn.insn, mem_insn.mem_loc, new_mem, 0))
    {
      if (dump_file)
	fprintf (dump_file, "validation failure\n");
      return false;
    }

  /* From here to the end of the function we are committed to the
     change, i.e. nothing fails.  Generate any necessary movs, move
     any regnotes, and fix up the reg_next_{use,inc_use,def}.  */
  switch (inc_insn.form)
    {
    case FORM_PRE_ADD:
      /* Replace the addition with a move.  Do it at the location of
	 the addition since the operand of the addition may change
	 before the memory reference.  */
      mov_insn = insert_move_insn_before (inc_insn.insn,
					  inc_insn.reg_res, inc_insn.reg0);
      move_dead_notes (mov_insn, inc_insn.insn, inc_insn.reg0);

      regno = REGNO (inc_insn.reg_res);
      reg_next_def[regno] = mov_insn;
      reg_next_use[regno] = NULL;
      regno = REGNO (inc_insn.reg0);
      reg_next_use[regno] = mov_insn;
      df_recompute_luids (bb);
      break;

    case FORM_POST_INC:
      regno = REGNO (inc_insn.reg_res);
      if (reg_next_use[regno] == reg_next_inc_use[regno])
	reg_next_inc_use[regno] = NULL;

      /* Fallthru.  */
    case FORM_PRE_INC:
      regno = REGNO (inc_insn.reg_res);
      reg_next_def[regno] = mem_insn.insn;
      reg_next_use[regno] = NULL;

      break;

    case FORM_POST_ADD:
      mov_insn = insert_move_insn_before (mem_insn.insn,
					  inc_insn.reg_res, inc_insn.reg0);
      move_dead_notes (mov_insn, inc_insn.insn, inc_insn.reg0);

      /* Do not move anything to the mov insn because the instruction
	 pointer for the main iteration has not yet hit that.  It is
	 still pointing to the mem insn. */
      regno = REGNO (inc_insn.reg_res);
      reg_next_def[regno] = mem_insn.insn;
      reg_next_use[regno] = NULL;

      regno = REGNO (inc_insn.reg0);
      reg_next_use[regno] = mem_insn.insn;
      if ((reg_next_use[regno] == reg_next_inc_use[regno])
	  || (reg_next_inc_use[regno] == inc_insn.insn))
	reg_next_inc_use[regno] = NULL;
      df_recompute_luids (bb);
      break;

    case FORM_last:
    default:
      gcc_unreachable ();
    }

  if (!inc_insn.reg1_is_const)
    {
      regno = REGNO (inc_insn.reg1);
      reg_next_use[regno] = mem_insn.insn;
      if ((reg_next_use[regno] == reg_next_inc_use[regno])
	  || (reg_next_inc_use[regno] == inc_insn.insn))
	reg_next_inc_use[regno] = NULL;
    }

  delete_insn (inc_insn.insn);

  if (dump_file && mov_insn)
    {
      fprintf (dump_file, "inserting mov ");
      dump_insn_slim (dump_file, mov_insn);
    }

  /* Record that this insn has an implicit side effect.  */
  add_reg_note (mem_insn.insn, REG_INC, inc_reg);

  if (dump_file)
    {
      fprintf (dump_file, "****success ");
      dump_insn_slim (dump_file, mem_insn.insn);
    }

  return true;
}