Beispiel #1
0
static void
create_canonical_iv (struct loop *loop, edge exit, tree niter)
{
  edge in;
  tree type, var;
  gcond *cond;
  gimple_stmt_iterator incr_at;
  enum tree_code cmp;

  if (dump_file && (dump_flags & TDF_DETAILS))
    {
      fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
      print_generic_expr (dump_file, niter, TDF_SLIM);
      fprintf (dump_file, " iterations.\n");
    }

  cond = as_a <gcond *> (last_stmt (exit->src));
  in = EDGE_SUCC (exit->src, 0);
  if (in == exit)
    in = EDGE_SUCC (exit->src, 1);

  /* Note that we do not need to worry about overflows, since
     type of niter is always unsigned and all comparisons are
     just for equality/nonequality -- i.e. everything works
     with a modulo arithmetics.  */

  type = TREE_TYPE (niter);
  niter = fold_build2 (PLUS_EXPR, type,
		       niter,
		       build_int_cst (type, 1));
  incr_at = gsi_last_bb (in->src);
  create_iv (niter,
	     build_int_cst (type, -1),
	     NULL_TREE, loop,
	     &incr_at, false, NULL, &var);

  cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
  gimple_cond_set_code (cond, cmp);
  gimple_cond_set_lhs (cond, var);
  gimple_cond_set_rhs (cond, build_int_cst (type, 0));
  update_stmt (cond);
}
Beispiel #2
0
static void
update_profile_after_ifcombine (basic_block inner_cond_bb,
			        basic_block outer_cond_bb)
{
  edge outer_to_inner = find_edge (outer_cond_bb, inner_cond_bb);
  edge outer2 = (EDGE_SUCC (outer_cond_bb, 0) == outer_to_inner
		 ? EDGE_SUCC (outer_cond_bb, 1)
		 : EDGE_SUCC (outer_cond_bb, 0));
  edge inner_taken = EDGE_SUCC (inner_cond_bb, 0);
  edge inner_not_taken = EDGE_SUCC (inner_cond_bb, 1);
  
  if (inner_taken->dest != outer2->dest)
    std::swap (inner_taken, inner_not_taken);
  gcc_assert (inner_taken->dest == outer2->dest);

  /* In the following we assume that inner_cond_bb has single predecessor.  */
  gcc_assert (single_pred_p (inner_cond_bb));

  /* Path outer_cond_bb->(outer2) needs to be merged into path
     outer_cond_bb->(outer_to_inner)->inner_cond_bb->(inner_taken)
     and probability of inner_not_taken updated.  */

  outer_to_inner->count = outer_cond_bb->count;
  inner_cond_bb->count = outer_cond_bb->count;
  inner_taken->count += outer2->count;
  outer2->count = 0;

  inner_taken->probability = outer2->probability
			     + RDIV (outer_to_inner->probability
				     * inner_taken->probability,
				     REG_BR_PROB_BASE);
  if (inner_taken->probability > REG_BR_PROB_BASE)
    inner_taken->probability = REG_BR_PROB_BASE;
  inner_not_taken->probability = REG_BR_PROB_BASE
				 - inner_taken->probability;

  outer_to_inner->probability = REG_BR_PROB_BASE;
  inner_cond_bb->frequency = outer_cond_bb->frequency;
  outer2->probability = 0;
}
Beispiel #3
0
static void
split_pattern_seq (void)
{
  rtx insn;
  basic_block bb;
  rtx retlabel, retjmp, saveinsn;
  int i;
  seq_block sb;

  insn = pattern_seqs->insn;
  bb = BLOCK_FOR_INSN (insn);

  /* Get the label after the sequence. This will be the return address. The
     label will be referenced using a symbol_ref so protect it from
     deleting.  */
  retlabel = block_label_after (insn);
  LABEL_PRESERVE_P (retlabel) = 1;

  /* Emit an indirect jump via the link register after the sequence acting
     as the return insn.  Also emit a barrier and update the basic block.  */
  if (!find_reg_note (BB_END (bb), REG_NORETURN, NULL))
    retjmp = emit_jump_insn_after (gen_indirect_jump (pattern_seqs->link_reg),
                                   BB_END (bb));
  emit_barrier_after (BB_END (bb));

  /* Replace all outgoing edges with a new one to the block of RETLABEL.  */
  while (EDGE_COUNT (bb->succs) != 0)
    remove_edge (EDGE_SUCC (bb, 0));
  make_edge (bb, BLOCK_FOR_INSN (retlabel), EDGE_ABNORMAL);

  /* Split the sequence according to SEQ_BLOCKS and cache the label of the
     resulting basic blocks.  */
  i = 0;
  for (sb = seq_blocks; sb; sb = sb->next_seq_block)
    {
      for (; i < sb->length; i++)
        insn = prev_insn_in_block (insn);

      sb->label = block_label (split_block_and_df_analyze (bb, insn));
    }

  /* Emit an insn saving the return address to the link register before the
     sequence.  */
  saveinsn = emit_insn_after (gen_move_insn (pattern_seqs->link_reg,
                              gen_symbol_ref_rtx_for_label
                              (retlabel)), BB_END (bb));
  /* Update liveness info.  */
  SET_REGNO_REG_SET (df_get_live_out (bb),
                     REGNO (pattern_seqs->link_reg));
}
static bool
recognize_if_then_else (basic_block cond_bb,
			basic_block *then_bb, basic_block *else_bb)
{
  edge t, e;

  if (EDGE_COUNT (cond_bb->succs) != 2)
    return false;

  /* Find the then/else edges.  */
  t = EDGE_SUCC (cond_bb, 0);
  e = EDGE_SUCC (cond_bb, 1);
  if (!(t->flags & EDGE_TRUE_VALUE))
    {
      edge tmp = t;
      t = e;
      e = tmp;
    }
  if (!(t->flags & EDGE_TRUE_VALUE)
      || !(e->flags & EDGE_FALSE_VALUE))
    return false;

  /* Check if the edge destinations point to the required block.  */
  if (*then_bb
      && t->dest != *then_bb)
    return false;
  if (*else_bb
      && e->dest != *else_bb)
    return false;

  if (!*then_bb)
    *then_bb = t->dest;
  if (!*else_bb)
    *else_bb = e->dest;

  return true;
}
Beispiel #5
0
static basic_block
construct_init_block (void)
{
  basic_block init_block, first_block;
  edge e = NULL;
  int flags;

  /* Multiple entry points not supported yet.  */
  gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1);

  e = EDGE_SUCC (ENTRY_BLOCK_PTR, 0);

  /* When entry edge points to first basic block, we don't need jump,
     otherwise we have to jump into proper target.  */
  if (e && e->dest != ENTRY_BLOCK_PTR->next_bb)
    {
      tree label = tree_block_label (e->dest);

      emit_jump (label_rtx (label));
      flags = 0;
    }
  else
    flags = EDGE_FALLTHRU;

  init_block = create_basic_block (NEXT_INSN (get_insns ()),
				   get_last_insn (),
				   ENTRY_BLOCK_PTR);
  init_block->frequency = ENTRY_BLOCK_PTR->frequency;
  init_block->count = ENTRY_BLOCK_PTR->count;
  if (e)
    {
      first_block = e->dest;
      redirect_edge_succ (e, init_block);
      e = make_edge (init_block, first_block, flags);
    }
  else
    e = make_edge (init_block, EXIT_BLOCK_PTR, EDGE_FALLTHRU);
  e->probability = REG_BR_PROB_BASE;
  e->count = ENTRY_BLOCK_PTR->count;

  update_bb_for_insn (init_block);
  return init_block;
}
static struct loop *
unswitch_loop (struct loop *loop, basic_block unswitch_on, rtx cond, rtx cinsn)
{
  edge entry, latch_edge, true_edge, false_edge, e;
  basic_block switch_bb, unswitch_on_alt;
  struct loop *nloop;
  int irred_flag, prob;
  rtx seq;

  /* Some sanity checking.  */
  gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on));
  gcc_assert (EDGE_COUNT (unswitch_on->succs) == 2);
  gcc_assert (just_once_each_iteration_p (loop, unswitch_on));
  gcc_assert (!loop->inner);
  gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 0)->dest));
  gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 1)->dest));

  entry = loop_preheader_edge (loop);

  /* Make a copy.  */
  irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
  entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
  if (!duplicate_loop_to_header_edge (loop, entry, 1,
			      	      NULL, NULL, NULL, 0))
    return NULL;
  entry->flags |= irred_flag;

  /* Record the block with condition we unswitch on.  */
  unswitch_on_alt = get_bb_copy (unswitch_on);
  true_edge = BRANCH_EDGE (unswitch_on_alt);
  false_edge = FALLTHRU_EDGE (unswitch_on);
  latch_edge = single_succ_edge (get_bb_copy (loop->latch));

  /* Create a block with the condition.  */
  prob = true_edge->probability;
  switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
  seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond),
			      block_label (true_edge->dest),
			      prob, cinsn);
  emit_insn_after (seq, BB_END (switch_bb));
  e = make_edge (switch_bb, true_edge->dest, 0);
  e->probability = prob;
  e->count = latch_edge->count * prob / REG_BR_PROB_BASE;
  e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU);
  e->probability = false_edge->probability;
  e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE;

  if (irred_flag)
    {
      switch_bb->flags |= BB_IRREDUCIBLE_LOOP;
      EDGE_SUCC (switch_bb, 0)->flags |= EDGE_IRREDUCIBLE_LOOP;
      EDGE_SUCC (switch_bb, 1)->flags |= EDGE_IRREDUCIBLE_LOOP;
    }
  else
    {
      switch_bb->flags &= ~BB_IRREDUCIBLE_LOOP;
      EDGE_SUCC (switch_bb, 0)->flags &= ~EDGE_IRREDUCIBLE_LOOP;
      EDGE_SUCC (switch_bb, 1)->flags &= ~EDGE_IRREDUCIBLE_LOOP;
    }

  /* Loopify from the copy of LOOP body, constructing the new loop.  */
  nloop = loopify (latch_edge,
		   single_pred_edge (get_bb_copy (loop->header)), switch_bb,
		   BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true,
		   prob, REG_BR_PROB_BASE - prob);

  copy_loop_info (loop, nloop);
  /* Remove branches that are now unreachable in new loops.  */
  remove_path (true_edge);
  remove_path (false_edge);

  /* Preserve the simple loop preheaders.  */
  split_edge (loop_preheader_edge (loop));
  split_edge (loop_preheader_edge (nloop));

  return nloop;
}
static rtx
may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn)
{
  rtx test, at, op[2], stest;
  struct rtx_iv iv;
  unsigned i;
  enum machine_mode mode;

  /* BB must end in a simple conditional jump.  */
  if (EDGE_COUNT (bb->succs) != 2)
    return NULL_RTX;
  if (!any_condjump_p (BB_END (bb)))
    return NULL_RTX;

  /* With branches inside loop.  */
  if (!flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 0)->dest)
      || !flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 1)->dest))
    return NULL_RTX;

  /* It must be executed just once each iteration (because otherwise we
     are unable to update dominator/irreducible loop information correctly).  */
  if (!just_once_each_iteration_p (loop, bb))
    return NULL_RTX;

  /* Condition must be invariant.  */
  test = get_condition (BB_END (bb), &at, true, false);
  if (!test)
    return NULL_RTX;

  for (i = 0; i < 2; i++)
    {
      op[i] = XEXP (test, i);

      if (CONSTANT_P (op[i]))
	continue;

      if (!iv_analyze (at, op[i], &iv))
	return NULL_RTX;
      if (iv.step != const0_rtx
	  || iv.first_special)
	return NULL_RTX;

      op[i] = get_iv_value (&iv, const0_rtx);
    }

  mode = GET_MODE (op[0]);
  if (mode == VOIDmode)
    mode = GET_MODE (op[1]);
  if (GET_MODE_CLASS (mode) == MODE_CC)
    {
      if (at != BB_END (bb))
	return NULL_RTX;

      if (!rtx_equal_p (op[0], XEXP (test, 0))
	  || !rtx_equal_p (op[1], XEXP (test, 1)))
	return NULL_RTX;

      *cinsn = BB_END (bb);
      return test;
    }

  stest = simplify_gen_relational (GET_CODE (test), SImode,
				   mode, op[0], op[1]);
  if (stest == const0_rtx
      || stest == const_true_rtx)
    return stest;

  return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode,
					  op[0], op[1]));
}
Beispiel #8
0
static bool
split_paths ()
{
    bool changed = false;
    loop_p loop;

    loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
    initialize_original_copy_tables ();
    calculate_dominance_info (CDI_DOMINATORS);

    FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
    {
        /* Only split paths if we are optimizing this loop for speed.  */
        if (!optimize_loop_for_speed_p (loop))
            continue;

        /* See if there is a block that we can duplicate to split the
        path to the loop latch.  */
        basic_block bb
            = find_block_to_duplicate_for_splitting_paths (loop->latch);

        /* BB is the merge point for an IF-THEN-ELSE we want to transform.

        Essentially we want to create a duplicate of bb and redirect the
         first predecessor of BB to the duplicate (leaving the second
         predecessor as is.  This will split the path leading to the latch
         re-using BB to avoid useless copying.  */
        if (bb && is_feasible_trace (bb))
        {
            if (dump_file && (dump_flags & TDF_DETAILS))
                fprintf (dump_file,
                         "Duplicating join block %d into predecessor paths\n",
                         bb->index);
            basic_block pred0 = EDGE_PRED (bb, 0)->src;
            transform_duplicate (pred0, bb);
            changed = true;

            /* If BB has an outgoing edge marked as IRREDUCIBLE, then
               duplicating BB may result in an irreducible region turning
               into a natural loop.

               Long term we might want to hook this into the block
               duplication code, but as we've seen with similar changes
               for edge removal, that can be somewhat risky.  */
            if (EDGE_SUCC (bb, 0)->flags & EDGE_IRREDUCIBLE_LOOP
                    || EDGE_SUCC (bb, 1)->flags & EDGE_IRREDUCIBLE_LOOP)
            {
                if (dump_file && (dump_flags & TDF_DETAILS))
                    fprintf (dump_file,
                             "Join block %d has EDGE_IRREDUCIBLE_LOOP set.  "
                             "Scheduling loop fixups.\n",
                             bb->index);
                loops_state_set (LOOPS_NEED_FIXUP);
            }
        }
    }

    loop_optimizer_finalize ();
    free_original_copy_tables ();
    return changed;
}
static bool
minmax_replacement (basic_block cond_bb, basic_block middle_bb,
                    edge e0, edge e1, gimple phi,
                    tree arg0, tree arg1)
{
    tree result, type;
    gimple cond, new_stmt;
    edge true_edge, false_edge;
    enum tree_code cmp, minmax, ass_code;
    tree smaller, larger, arg_true, arg_false;
    gimple_stmt_iterator gsi, gsi_from;

    type = TREE_TYPE (PHI_RESULT (phi));

    /* The optimization may be unsafe due to NaNs.  */
    if (HONOR_NANS (TYPE_MODE (type)))
        return false;

    cond = last_stmt (cond_bb);
    cmp = gimple_cond_code (cond);
    result = PHI_RESULT (phi);

    /* This transformation is only valid for order comparisons.  Record which
       operand is smaller/larger if the result of the comparison is true.  */
    if (cmp == LT_EXPR || cmp == LE_EXPR)
    {
        smaller = gimple_cond_lhs (cond);
        larger = gimple_cond_rhs (cond);
    }
    else if (cmp == GT_EXPR || cmp == GE_EXPR)
    {
        smaller = gimple_cond_rhs (cond);
        larger = gimple_cond_lhs (cond);
    }
    else
        return false;

    /* We need to know which is the true edge and which is the false
        edge so that we know if have abs or negative abs.  */
    extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);

    /* Forward the edges over the middle basic block.  */
    if (true_edge->dest == middle_bb)
        true_edge = EDGE_SUCC (true_edge->dest, 0);
    if (false_edge->dest == middle_bb)
        false_edge = EDGE_SUCC (false_edge->dest, 0);

    if (true_edge == e0)
    {
        gcc_assert (false_edge == e1);
        arg_true = arg0;
        arg_false = arg1;
    }
    else
    {
        gcc_assert (false_edge == e0);
        gcc_assert (true_edge == e1);
        arg_true = arg1;
        arg_false = arg0;
    }

    if (empty_block_p (middle_bb))
    {
        if (operand_equal_for_phi_arg_p (arg_true, smaller)
                && operand_equal_for_phi_arg_p (arg_false, larger))
        {
            /* Case

               if (smaller < larger)
               rslt = smaller;
               else
               rslt = larger;  */
            minmax = MIN_EXPR;
        }
        else if (operand_equal_for_phi_arg_p (arg_false, smaller)
                 && operand_equal_for_phi_arg_p (arg_true, larger))
            minmax = MAX_EXPR;
        else
            return false;
    }
    else
    {
        /* Recognize the following case, assuming d <= u:

        if (a <= u)
           b = MAX (a, d);
         x = PHI <b, u>

         This is equivalent to

         b = MAX (a, d);
         x = MIN (b, u);  */

        gimple assign = last_and_only_stmt (middle_bb);
        tree lhs, op0, op1, bound;

        if (!assign
                || gimple_code (assign) != GIMPLE_ASSIGN)
            return false;

        lhs = gimple_assign_lhs (assign);
        ass_code = gimple_assign_rhs_code (assign);
        if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
            return false;
        op0 = gimple_assign_rhs1 (assign);
        op1 = gimple_assign_rhs2 (assign);

        if (true_edge->src == middle_bb)
        {
            /* We got here if the condition is true, i.e., SMALLER < LARGER.  */
            if (!operand_equal_for_phi_arg_p (lhs, arg_true))
                return false;

            if (operand_equal_for_phi_arg_p (arg_false, larger))
            {
                /* Case

                if (smaller < larger)
                   {
                     r' = MAX_EXPR (smaller, bound)
                   }
                 r = PHI <r', larger>  --> to be turned to MIN_EXPR.  */
                if (ass_code != MAX_EXPR)
                    return false;

                minmax = MIN_EXPR;
                if (operand_equal_for_phi_arg_p (op0, smaller))
                    bound = op1;
                else if (operand_equal_for_phi_arg_p (op1, smaller))
                    bound = op0;
                else
                    return false;

                /* We need BOUND <= LARGER.  */
                if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
                                                    bound, larger)))
                    return false;
            }
            else if (operand_equal_for_phi_arg_p (arg_false, smaller))
            {
                /* Case

                if (smaller < larger)
                   {
                     r' = MIN_EXPR (larger, bound)
                   }
                 r = PHI <r', smaller>  --> to be turned to MAX_EXPR.  */
                if (ass_code != MIN_EXPR)
                    return false;

                minmax = MAX_EXPR;
                if (operand_equal_for_phi_arg_p (op0, larger))
                    bound = op1;
                else if (operand_equal_for_phi_arg_p (op1, larger))
                    bound = op0;
                else
                    return false;

                /* We need BOUND >= SMALLER.  */
                if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
                                                    bound, smaller)))
                    return false;
            }
            else
                return false;
        }
        else
        {
            /* We got here if the condition is false, i.e., SMALLER > LARGER.  */
            if (!operand_equal_for_phi_arg_p (lhs, arg_false))
                return false;

            if (operand_equal_for_phi_arg_p (arg_true, larger))
            {
                /* Case

                if (smaller > larger)
                   {
                     r' = MIN_EXPR (smaller, bound)
                   }
                 r = PHI <r', larger>  --> to be turned to MAX_EXPR.  */
                if (ass_code != MIN_EXPR)
                    return false;

                minmax = MAX_EXPR;
                if (operand_equal_for_phi_arg_p (op0, smaller))
                    bound = op1;
                else if (operand_equal_for_phi_arg_p (op1, smaller))
                    bound = op0;
                else
                    return false;

                /* We need BOUND >= LARGER.  */
                if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
                                                    bound, larger)))
                    return false;
            }
            else if (operand_equal_for_phi_arg_p (arg_true, smaller))
            {
                /* Case

                if (smaller > larger)
                   {
                     r' = MAX_EXPR (larger, bound)
                   }
                 r = PHI <r', smaller>  --> to be turned to MIN_EXPR.  */
                if (ass_code != MAX_EXPR)
                    return false;

                minmax = MIN_EXPR;
                if (operand_equal_for_phi_arg_p (op0, larger))
                    bound = op1;
                else if (operand_equal_for_phi_arg_p (op1, larger))
                    bound = op0;
                else
                    return false;

                /* We need BOUND <= SMALLER.  */
                if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
                                                    bound, smaller)))
                    return false;
            }
            else
                return false;
        }

        /* Move the statement from the middle block.  */
        gsi = gsi_last_bb (cond_bb);
        gsi_from = gsi_last_bb (middle_bb);
        gsi_move_before (&gsi_from, &gsi);
    }

    /* Emit the statement to compute min/max.  */
    result = duplicate_ssa_name (PHI_RESULT (phi), NULL);
    new_stmt = gimple_build_assign_with_ops (minmax, result, arg0, arg1);
    gsi = gsi_last_bb (cond_bb);
    gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);

    replace_phi_edge_with_variable (cond_bb, e1, phi, result);
    return true;
}
static void
replace_phi_edge_with_variable (basic_block cond_block,
                                edge e, gimple phi, tree new_tree)
{
    basic_block bb = gimple_bb (phi);
    basic_block block_to_remove;
    gimple_stmt_iterator gsi;

    /* Change the PHI argument to new.  */
    SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);

    /* Remove the empty basic block.  */
    if (EDGE_SUCC (cond_block, 0)->dest == bb)
    {
        EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
        EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
        EDGE_SUCC (cond_block, 0)->probability = REG_BR_PROB_BASE;
        EDGE_SUCC (cond_block, 0)->count += EDGE_SUCC (cond_block, 1)->count;

        block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
    }
    else
    {
        EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
        EDGE_SUCC (cond_block, 1)->flags
        &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
        EDGE_SUCC (cond_block, 1)->probability = REG_BR_PROB_BASE;
        EDGE_SUCC (cond_block, 1)->count += EDGE_SUCC (cond_block, 0)->count;

        block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
    }
    delete_basic_block (block_to_remove);

    /* Eliminate the COND_EXPR at the end of COND_BLOCK.  */
    gsi = gsi_last_bb (cond_block);
    gsi_remove (&gsi, true);

    if (dump_file && (dump_flags & TDF_DETAILS))
        fprintf (dump_file,
                 "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n",
                 cond_block->index,
                 bb->index);
}
Beispiel #11
0
static bool
should_duplicate_loop_header_p (basic_block header, struct loop *loop,
				int *limit)
{
  gimple_stmt_iterator bsi;
  gimple *last;

  gcc_assert (!header->aux);

  /* Loop header copying usually increases size of the code.  This used not to
     be true, since quite often it is possible to verify that the condition is
     satisfied in the first iteration and therefore to eliminate it.  Jump
     threading handles these cases now.  */
  if (optimize_loop_for_size_p (loop))
    {
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file,
		 "  Not duplicating bb %i: optimizing for size.\n",
		 header->index);
      return false;
    }

  gcc_assert (EDGE_COUNT (header->succs) > 0);
  if (single_succ_p (header))
    {
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file,
		 "  Not duplicating bb %i: it is single succ.\n",
		 header->index);
      return false;
    }

  if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)
      && flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 1)->dest))
    {
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file,
		 "  Not duplicating bb %i: both sucessors are in loop.\n",
		 loop->num);
      return false;
    }

  /* If this is not the original loop header, we want it to have just
     one predecessor in order to match the && pattern.  */
  if (header != loop->header && !single_pred_p (header))
    {
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file,
		 "  Not duplicating bb %i: it has mutiple predecestors.\n",
		 header->index);
      return false;
    }

  last = last_stmt (header);
  if (gimple_code (last) != GIMPLE_COND)
    {
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file,
		 "  Not duplicating bb %i: it does not end by conditional.\n",
		 header->index);
      return false;
    }

  /* Count number of instructions and punt on calls.  */
  for (bsi = gsi_start_bb (header); !gsi_end_p (bsi); gsi_next (&bsi))
    {
      last = gsi_stmt (bsi);

      if (gimple_code (last) == GIMPLE_LABEL)
	continue;

      if (is_gimple_debug (last))
	continue;

      if (gimple_code (last) == GIMPLE_CALL
	  && !gimple_inexpensive_call_p (as_a <gcall *> (last)))
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file,
		     "  Not duplicating bb %i: it contains call.\n",
		     header->index);
	  return false;
	}

      *limit -= estimate_num_insns (last, &eni_size_weights);
      if (*limit < 0)
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file,
		     "  Not duplicating bb %i contains too many insns.\n",
		     header->index);
	  return false;
	}
    }
  if (dump_file && (dump_flags & TDF_DETAILS))
    fprintf (dump_file, "    Will duplicate bb %i\n", header->index); 
  return true;
}
static void
copy_loop_headers (void)
{
  struct loops *loops;
  unsigned i;
  struct loop *loop;
  basic_block header;
  edge exit;
  basic_block *bbs;
  unsigned n_bbs;

  loops = loop_optimizer_init (dump_file);
  if (!loops)
    return;
  rewrite_into_loop_closed_ssa ();
  
  /* We do not try to keep the information about irreducible regions
     up-to-date.  */
  loops->state &= ~LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS;

#ifdef ENABLE_CHECKING
  verify_loop_structure (loops);
#endif

  bbs = xmalloc (sizeof (basic_block) * n_basic_blocks);

  for (i = 1; i < loops->num; i++)
    {
      /* Copy at most 20 insns.  */
      int limit = 20;

      loop = loops->parray[i];
      if (!loop)
	continue;
      header = loop->header;

      /* If the loop is already a do-while style one (either because it was
	 written as such, or because jump threading transformed it into one),
	 we might be in fact peeling the first iteration of the loop.  This
	 in general is not a good idea.  */
      if (do_while_loop_p (loop))
	continue;

      /* Iterate the header copying up to limit; this takes care of the cases
	 like while (a && b) {...}, where we want to have both of the conditions
	 copied.  TODO -- handle while (a || b) - like cases, by not requiring
	 the header to have just a single successor and copying up to
	 postdominator.  */

      exit = NULL;
      n_bbs = 0;
      while (should_duplicate_loop_header_p (header, loop, &limit))
	{
	  /* Find a successor of header that is inside a loop; i.e. the new
	     header after the condition is copied.  */
	  if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest))
	    exit = EDGE_SUCC (header, 0);
	  else
	    exit = EDGE_SUCC (header, 1);
	  bbs[n_bbs++] = header;
	  header = exit->dest;
	}

      if (!exit)
	continue;

      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file,
		 "Duplicating header of the loop %d up to edge %d->%d.\n",
		 loop->num, exit->src->index, exit->dest->index);

      /* Ensure that the header will have just the latch as a predecessor
	 inside the loop.  */
      if (EDGE_COUNT (exit->dest->preds) > 1)
	exit = EDGE_SUCC (loop_split_edge_with (exit, NULL), 0);

      if (!tree_duplicate_sese_region (loop_preheader_edge (loop), exit,
				       bbs, n_bbs, NULL))
	{
	  fprintf (dump_file, "Duplication failed.\n");
	  continue;
	}

      /* Ensure that the latch and the preheader is simple (we know that they
	 are not now, since there was the loop exit condition.  */
      loop_split_edge_with (loop_preheader_edge (loop), NULL);
      loop_split_edge_with (loop_latch_edge (loop), NULL);
    }

  free (bbs);

#ifdef ENABLE_CHECKING
  verify_loop_closed_ssa ();
#endif

  loop_optimizer_finalize (loops, NULL);
}
Beispiel #13
0
static void
doloop_modify (struct loop *loop, struct niter_desc *desc,
               rtx doloop_seq, rtx condition, rtx count)
{
    rtx counter_reg;
    rtx tmp, noloop = NULL_RTX;
    rtx sequence;
    rtx jump_insn;
    rtx jump_label;
    int nonneg = 0, irr;
    bool increment_count;
    basic_block loop_end = desc->out_edge->src;
    enum machine_mode mode;

    jump_insn = BB_END (loop_end);

    if (dump_file)
    {
        fprintf (dump_file, "Doloop: Inserting doloop pattern (");
        if (desc->const_iter)
            fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
        else
            fputs ("runtime", dump_file);
        fputs (" iterations).\n", dump_file);
    }

    /* Discard original jump to continue loop.  The original compare
       result may still be live, so it cannot be discarded explicitly.  */
    delete_insn (jump_insn);

    counter_reg = XEXP (condition, 0);
    if (GET_CODE (counter_reg) == PLUS)
        counter_reg = XEXP (counter_reg, 0);
    mode = GET_MODE (counter_reg);

    increment_count = false;
    switch (GET_CODE (condition))
    {
    case NE:
        /* Currently only NE tests against zero and one are supported.  */
        if (XEXP (condition, 1) == const1_rtx)
        {
            increment_count = true;
            noloop = const1_rtx;
        }
        else if (XEXP (condition, 1) == const0_rtx)
            noloop = const0_rtx;
        else
            abort ();
        break;

    case GE:
        /* Currently only GE tests against zero are supported.  */
        if (XEXP (condition, 1) != const0_rtx)
            abort ();

        noloop = constm1_rtx;

        /* The iteration count does not need incrementing for a GE test.  */
        increment_count = false;

        /* Determine if the iteration counter will be non-negative.
        Note that the maximum value loaded is iterations_max - 1.  */
        if (desc->niter_max
                <= ((unsigned HOST_WIDEST_INT) 1
                    << (GET_MODE_BITSIZE (mode) - 1)))
            nonneg = 1;
        break;

    /* Abort if an invalid doloop pattern has been generated.  */
    default:
        abort ();
    }

    if (increment_count)
        count = simplify_gen_binary (PLUS, mode, count, const1_rtx);

    /* Insert initialization of the count register into the loop header.  */
    start_sequence ();
    tmp = force_operand (count, counter_reg);
    convert_move (counter_reg, tmp, 1);
    sequence = get_insns ();
    end_sequence ();
    emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));

    if (desc->noloop_assumptions)
    {
        rtx ass = copy_rtx (desc->noloop_assumptions);
        basic_block preheader = loop_preheader_edge (loop)->src;
        basic_block set_zero
            = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
        basic_block new_preheader
            = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
        basic_block bb;
        edge te;
        gcov_type cnt;

        /* Expand the condition testing the assumptions and if it does not pass,
        reset the count register to 0.  */
        add_test (XEXP (ass, 0), preheader, set_zero);
        EDGE_SUCC (preheader, 0)->flags &= ~EDGE_FALLTHRU;
        cnt = EDGE_SUCC (preheader, 0)->count;
        EDGE_SUCC (preheader, 0)->probability = 0;
        EDGE_SUCC (preheader, 0)->count = 0;
        irr = EDGE_SUCC (preheader, 0)->flags & EDGE_IRREDUCIBLE_LOOP;
        te = make_edge (preheader, new_preheader, EDGE_FALLTHRU | irr);
        te->probability = REG_BR_PROB_BASE;
        te->count = cnt;
        set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);

        set_zero->count = 0;
        set_zero->frequency = 0;

        for (ass = XEXP (ass, 1); ass; ass = XEXP (ass, 1))
        {
            bb = loop_split_edge_with (te, NULL_RTX);
            te = EDGE_SUCC (bb, 0);
            add_test (XEXP (ass, 0), bb, set_zero);
            make_edge (bb, set_zero, irr);
        }

        start_sequence ();
        convert_move (counter_reg, noloop, 0);
        sequence = get_insns ();
        end_sequence ();
        emit_insn_after (sequence, BB_END (set_zero));
    }

    /* Some targets (eg, C4x) need to initialize special looping
       registers.  */
#ifdef HAVE_doloop_begin
    {
        rtx init;
        unsigned level = get_loop_level (loop) + 1;
        init = gen_doloop_begin (counter_reg,
                                 desc->const_iter ? desc->niter_expr : const0_rtx,
                                 desc->niter_max,
                                 GEN_INT (level));
        if (init)
        {
            start_sequence ();
            emit_insn (init);
            sequence = get_insns ();
            end_sequence ();
            emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
        }
    }
#endif

    /* Insert the new low-overhead looping insn.  */
    emit_jump_insn_after (doloop_seq, BB_END (loop_end));
    jump_insn = BB_END (loop_end);
    jump_label = block_label (desc->in_edge->dest);
    JUMP_LABEL (jump_insn) = jump_label;
    LABEL_NUSES (jump_label)++;

    /* Ensure the right fallthru edge is marked, for case we have reversed
       the condition.  */
    desc->in_edge->flags &= ~EDGE_FALLTHRU;
    desc->out_edge->flags |= EDGE_FALLTHRU;

    /* Add a REG_NONNEG note if the actual or estimated maximum number
       of iterations is non-negative.  */
    if (nonneg)
    {
        REG_NOTES (jump_insn)
            = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn));
    }
}
Beispiel #14
0
static void
fixup_reorder_chain (void)
{
  basic_block bb, prev_bb;
  int index;
  rtx insn = NULL;

  if (cfg_layout_function_header)
    {
      set_first_insn (cfg_layout_function_header);
      insn = cfg_layout_function_header;
      while (NEXT_INSN (insn))
	insn = NEXT_INSN (insn);
    }

  /* First do the bulk reordering -- rechain the blocks without regard to
     the needed changes to jumps and labels.  */

  for (bb = ENTRY_BLOCK_PTR->next_bb, index = NUM_FIXED_BLOCKS;
       bb != 0;
       bb = bb->aux, index++)
    {
      if (bb->il.rtl->header)
	{
	  if (insn)
	    NEXT_INSN (insn) = bb->il.rtl->header;
	  else
	    set_first_insn (bb->il.rtl->header);
	  PREV_INSN (bb->il.rtl->header) = insn;
	  insn = bb->il.rtl->header;
	  while (NEXT_INSN (insn))
	    insn = NEXT_INSN (insn);
	}
      if (insn)
	NEXT_INSN (insn) = BB_HEAD (bb);
      else
	set_first_insn (BB_HEAD (bb));
      PREV_INSN (BB_HEAD (bb)) = insn;
      insn = BB_END (bb);
      if (bb->il.rtl->footer)
	{
	  NEXT_INSN (insn) = bb->il.rtl->footer;
	  PREV_INSN (bb->il.rtl->footer) = insn;
	  while (NEXT_INSN (insn))
	    insn = NEXT_INSN (insn);
	}
    }

  gcc_assert (index == n_basic_blocks);

  NEXT_INSN (insn) = cfg_layout_function_footer;
  if (cfg_layout_function_footer)
    PREV_INSN (cfg_layout_function_footer) = insn;

  while (NEXT_INSN (insn))
    insn = NEXT_INSN (insn);

  set_last_insn (insn);
#ifdef ENABLE_CHECKING
  verify_insn_chain ();
#endif
  delete_dead_jumptables ();

  /* Now add jumps and labels as needed to match the blocks new
     outgoing edges.  */

  for (bb = ENTRY_BLOCK_PTR->next_bb; bb ; bb = bb->aux)
    {
      edge e_fall, e_taken, e;
      rtx bb_end_insn;
      basic_block nb;
      edge_iterator ei;

      if (EDGE_COUNT (bb->succs) == 0)
	continue;

      /* Find the old fallthru edge, and another non-EH edge for
	 a taken jump.  */
      e_taken = e_fall = NULL;

      FOR_EACH_EDGE (e, ei, bb->succs)
	if (e->flags & EDGE_FALLTHRU)
	  e_fall = e;
	else if (! (e->flags & EDGE_EH))
	  e_taken = e;

      bb_end_insn = BB_END (bb);
      if (JUMP_P (bb_end_insn))
	{
	  if (any_condjump_p (bb_end_insn))
	    {
	      /* If the old fallthru is still next, nothing to do.  */
	      if (bb->aux == e_fall->dest
		  || e_fall->dest == EXIT_BLOCK_PTR)
		continue;

	      /* The degenerated case of conditional jump jumping to the next
		 instruction can happen for jumps with side effects.  We need
		 to construct a forwarder block and this will be done just
		 fine by force_nonfallthru below.  */
	      if (!e_taken)
		;

	      /* There is another special case: if *neither* block is next,
		 such as happens at the very end of a function, then we'll
		 need to add a new unconditional jump.  Choose the taken
		 edge based on known or assumed probability.  */
	      else if (bb->aux != e_taken->dest)
		{
		  rtx note = find_reg_note (bb_end_insn, REG_BR_PROB, 0);

		  if (note
		      && INTVAL (XEXP (note, 0)) < REG_BR_PROB_BASE / 2
		      && invert_jump (bb_end_insn,
				      (e_fall->dest == EXIT_BLOCK_PTR
				       ? NULL_RTX
				       : label_for_bb (e_fall->dest)), 0))
		    {
		      e_fall->flags &= ~EDGE_FALLTHRU;
#ifdef ENABLE_CHECKING
		      gcc_assert (could_fall_through
				  (e_taken->src, e_taken->dest));
#endif
		      e_taken->flags |= EDGE_FALLTHRU;
		      update_br_prob_note (bb);
		      e = e_fall, e_fall = e_taken, e_taken = e;
		    }
		}

	      /* If the "jumping" edge is a crossing edge, and the fall
		 through edge is non-crossing, leave things as they are.  */
	      else if ((e_taken->flags & EDGE_CROSSING)
		       && !(e_fall->flags & EDGE_CROSSING))
		continue;

	      /* Otherwise we can try to invert the jump.  This will
		 basically never fail, however, keep up the pretense.  */
	      else if (invert_jump (bb_end_insn,
				    (e_fall->dest == EXIT_BLOCK_PTR
				     ? NULL_RTX
				     : label_for_bb (e_fall->dest)), 0))
		{
		  e_fall->flags &= ~EDGE_FALLTHRU;
#ifdef ENABLE_CHECKING
		  gcc_assert (could_fall_through
			      (e_taken->src, e_taken->dest));
#endif
		  e_taken->flags |= EDGE_FALLTHRU;
		  update_br_prob_note (bb);
		  continue;
		}
	    }
	  else
	    {
	      /* Otherwise we have some return, switch or computed
		 jump.  In the 99% case, there should not have been a
		 fallthru edge.  */
	      gcc_assert (returnjump_p (bb_end_insn) || !e_fall);
	      continue;
	    }
	}
      else
	{
	  /* No fallthru implies a noreturn function with EH edges, or
	     something similarly bizarre.  In any case, we don't need to
	     do anything.  */
	  if (! e_fall)
	    continue;

	  /* If the fallthru block is still next, nothing to do.  */
	  if (bb->aux == e_fall->dest)
	    continue;

	  /* A fallthru to exit block.  */
	  if (e_fall->dest == EXIT_BLOCK_PTR)
	    continue;
	}

      /* We got here if we need to add a new jump insn.  */
      nb = force_nonfallthru (e_fall);
      if (nb)
	{
	  nb->il.rtl->visited = 1;
	  nb->aux = bb->aux;
	  bb->aux = nb;
	  /* Don't process this new block.  */
	  bb = nb;

	  /* Make sure new bb is tagged for correct section (same as
	     fall-thru source, since you cannot fall-throu across
	     section boundaries).  */
	  BB_COPY_PARTITION (e_fall->src, single_pred (bb));
	  if (flag_reorder_blocks_and_partition
	      && targetm.have_named_sections
	      && JUMP_P (BB_END (bb))
	      && !any_condjump_p (BB_END (bb))
	      && (EDGE_SUCC (bb, 0)->flags & EDGE_CROSSING))
	    REG_NOTES (BB_END (bb)) = gen_rtx_EXPR_LIST
	      (REG_CROSSING_JUMP, NULL_RTX, REG_NOTES (BB_END (bb)));
	}
    }

  /* Put basic_block_info in the new order.  */

  if (dump_file)
    {
      fprintf (dump_file, "Reordered sequence:\n");
      for (bb = ENTRY_BLOCK_PTR->next_bb, index = NUM_FIXED_BLOCKS;
	   bb;
	   bb = bb->aux, index++)
	{
	  fprintf (dump_file, " %i ", index);
	  if (get_bb_original (bb))
	    fprintf (dump_file, "duplicate of %i ",
		     get_bb_original (bb)->index);
	  else if (forwarder_block_p (bb)
		   && !LABEL_P (BB_HEAD (bb)))
	    fprintf (dump_file, "compensation ");
	  else
	    fprintf (dump_file, "bb %i ", bb->index);
	  fprintf (dump_file, " [%i]\n", bb->frequency);
	}
    }

  prev_bb = ENTRY_BLOCK_PTR;
  bb = ENTRY_BLOCK_PTR->next_bb;
  index = NUM_FIXED_BLOCKS;

  for (; bb; prev_bb = bb, bb = bb->aux, index ++)
    {
      bb->index = index;
      SET_BASIC_BLOCK (index, bb);

      bb->prev_bb = prev_bb;
      prev_bb->next_bb = bb;
    }
  prev_bb->next_bb = EXIT_BLOCK_PTR;
  EXIT_BLOCK_PTR->prev_bb = prev_bb;

  /* Annoying special case - jump around dead jumptables left in the code.  */
  FOR_EACH_BB (bb)
    {
      edge e;
      edge_iterator ei;

      FOR_EACH_EDGE (e, ei, bb->succs)
	if (e->flags & EDGE_FALLTHRU)
	  break;

      if (e && !can_fallthru (e->src, e->dest))
	force_nonfallthru (e);
    }
}
Beispiel #15
0
static unsigned int
copy_loop_headers (void)
{
  struct loops *loops;
  unsigned i;
  struct loop *loop;
  basic_block header;
  edge exit, entry;
  basic_block *bbs, *copied_bbs;
  unsigned n_bbs;
  unsigned bbs_size;

  loops = loop_optimizer_init (LOOPS_HAVE_PREHEADERS
			       | LOOPS_HAVE_SIMPLE_LATCHES);
  if (!loops)
    return 0;

#ifdef ENABLE_CHECKING
  verify_loop_structure (loops);
#endif

  bbs = XNEWVEC (basic_block, n_basic_blocks);
  copied_bbs = XNEWVEC (basic_block, n_basic_blocks);
  bbs_size = n_basic_blocks;

  for (i = 1; i < loops->num; i++)
    {
      /* Copy at most 20 insns.  */
      int limit = 20;

      loop = loops->parray[i];
      if (!loop)
	continue;
      header = loop->header;

      /* If the loop is already a do-while style one (either because it was
	 written as such, or because jump threading transformed it into one),
	 we might be in fact peeling the first iteration of the loop.  This
	 in general is not a good idea.  */
      if (do_while_loop_p (loop))
	continue;

      /* Iterate the header copying up to limit; this takes care of the cases
	 like while (a && b) {...}, where we want to have both of the conditions
	 copied.  TODO -- handle while (a || b) - like cases, by not requiring
	 the header to have just a single successor and copying up to
	 postdominator.  */

      exit = NULL;
      n_bbs = 0;
      while (should_duplicate_loop_header_p (header, loop, &limit))
	{
	  /* Find a successor of header that is inside a loop; i.e. the new
	     header after the condition is copied.  */
	  if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest))
	    exit = EDGE_SUCC (header, 0);
	  else
	    exit = EDGE_SUCC (header, 1);
	  bbs[n_bbs++] = header;
	  gcc_assert (bbs_size > n_bbs);
	  header = exit->dest;
	}

      if (!exit)
	continue;

      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file,
		 "Duplicating header of the loop %d up to edge %d->%d.\n",
		 loop->num, exit->src->index, exit->dest->index);

      /* Ensure that the header will have just the latch as a predecessor
	 inside the loop.  */
      if (!single_pred_p (exit->dest))
	exit = single_pred_edge (loop_split_edge_with (exit, NULL));

      entry = loop_preheader_edge (loop);

      if (!tree_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs))
	{
	  fprintf (dump_file, "Duplication failed.\n");
	  continue;
	}

      /* If the loop has the form "for (i = j; i < j + 10; i++)" then
	 this copying can introduce a case where we rely on undefined
	 signed overflow to eliminate the preheader condition, because
	 we assume that "j < j + 10" is true.  We don't want to warn
	 about that case for -Wstrict-overflow, because in general we
	 don't warn about overflow involving loops.  Prevent the
	 warning by setting TREE_NO_WARNING.  */
      if (warn_strict_overflow > 0)
	{
	  unsigned int i;

	  for (i = 0; i < n_bbs; ++i)
	    {
	      tree last;

	      last = last_stmt (copied_bbs[i]);
	      if (TREE_CODE (last) == COND_EXPR)
		TREE_NO_WARNING (last) = 1;
	    }
	}

      /* Ensure that the latch and the preheader is simple (we know that they
	 are not now, since there was the loop exit condition.  */
      loop_split_edge_with (loop_preheader_edge (loop), NULL);
      loop_split_edge_with (loop_latch_edge (loop), NULL);
    }

  free (bbs);
  free (copied_bbs);

  loop_optimizer_finalize (loops);
  return 0;
}
Beispiel #16
0
/* The core routine of conditional store replacement and normal
   phi optimizations.  Both share much of the infrastructure in how
   to match applicable basic block patterns.  DO_STORE_ELIM is true
   when we want to do conditional store replacement, false otherwise.
   DO_HOIST_LOADS is true when we want to hoist adjacent loads out
   of diamond control flow patterns, false otherwise.  */
static unsigned int
tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
{
  basic_block bb;
  basic_block *bb_order;
  unsigned n, i;
  bool cfgchanged = false;
  hash_set<tree> *nontrap = 0;

  if (do_store_elim)
    /* Calculate the set of non-trapping memory accesses.  */
    nontrap = get_non_trapping ();

  /* Search every basic block for COND_EXPR we may be able to optimize.

     We walk the blocks in order that guarantees that a block with
     a single predecessor is processed before the predecessor.
     This ensures that we collapse inner ifs before visiting the
     outer ones, and also that we do not try to visit a removed
     block.  */
  bb_order = single_pred_before_succ_order ();
  n = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;

  for (i = 0; i < n; i++)
    {
      gimple cond_stmt;
      gphi *phi;
      basic_block bb1, bb2;
      edge e1, e2;
      tree arg0, arg1;

      bb = bb_order[i];

      cond_stmt = last_stmt (bb);
      /* Check to see if the last statement is a GIMPLE_COND.  */
      if (!cond_stmt
          || gimple_code (cond_stmt) != GIMPLE_COND)
        continue;

      e1 = EDGE_SUCC (bb, 0);
      bb1 = e1->dest;
      e2 = EDGE_SUCC (bb, 1);
      bb2 = e2->dest;

      /* We cannot do the optimization on abnormal edges.  */
      if ((e1->flags & EDGE_ABNORMAL) != 0
          || (e2->flags & EDGE_ABNORMAL) != 0)
       continue;

      /* If either bb1's succ or bb2 or bb2's succ is non NULL.  */
      if (EDGE_COUNT (bb1->succs) == 0
          || bb2 == NULL
	  || EDGE_COUNT (bb2->succs) == 0)
        continue;

      /* Find the bb which is the fall through to the other.  */
      if (EDGE_SUCC (bb1, 0)->dest == bb2)
        ;
      else if (EDGE_SUCC (bb2, 0)->dest == bb1)
        {
	  std::swap (bb1, bb2);
	  std::swap (e1, e2);
	}
      else if (do_store_elim
	       && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
	{
	  basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;

	  if (!single_succ_p (bb1)
	      || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0
	      || !single_succ_p (bb2)
	      || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0
	      || EDGE_COUNT (bb3->preds) != 2)
	    continue;
	  if (cond_if_else_store_replacement (bb1, bb2, bb3))
	    cfgchanged = true;
	  continue;
	}
      else if (do_hoist_loads
		 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
	{
	  basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;

	  if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt)))
	      && single_succ_p (bb1)
	      && single_succ_p (bb2)
	      && single_pred_p (bb1)
	      && single_pred_p (bb2)
	      && EDGE_COUNT (bb->succs) == 2
	      && EDGE_COUNT (bb3->preds) == 2
	      /* If one edge or the other is dominant, a conditional move
		 is likely to perform worse than the well-predicted branch.  */
	      && !predictable_edge_p (EDGE_SUCC (bb, 0))
	      && !predictable_edge_p (EDGE_SUCC (bb, 1)))
	    hoist_adjacent_loads (bb, bb1, bb2, bb3);
	  continue;
	}
      else
	continue;

      e1 = EDGE_SUCC (bb1, 0);

      /* Make sure that bb1 is just a fall through.  */
      if (!single_succ_p (bb1)
	  || (e1->flags & EDGE_FALLTHRU) == 0)
        continue;

      /* Also make sure that bb1 only have one predecessor and that it
	 is bb.  */
      if (!single_pred_p (bb1)
          || single_pred (bb1) != bb)
	continue;

      if (do_store_elim)
	{
	  /* bb1 is the middle block, bb2 the join block, bb the split block,
	     e1 the fallthrough edge from bb1 to bb2.  We can't do the
	     optimization if the join block has more than two predecessors.  */
	  if (EDGE_COUNT (bb2->preds) > 2)
	    continue;
	  if (cond_store_replacement (bb1, bb2, e1, e2, nontrap))
	    cfgchanged = true;
	}
      else
	{
	  gimple_seq phis = phi_nodes (bb2);
	  gimple_stmt_iterator gsi;
	  bool candorest = true;

	  /* Value replacement can work with more than one PHI
	     so try that first. */
	  for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
	    {
	      phi = as_a <gphi *> (gsi_stmt (gsi));
	      arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
	      arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
	      if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2)
		{
		  candorest = false;
	          cfgchanged = true;
		  break;
		}
	    }

	  if (!candorest)
	    continue;

	  phi = single_non_singleton_phi_for_edges (phis, e1, e2);
	  if (!phi)
	    continue;

	  arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
	  arg1 = gimple_phi_arg_def (phi, e2->dest_idx);

	  /* Something is wrong if we cannot find the arguments in the PHI
	     node.  */
	  gcc_assert (arg0 != NULL && arg1 != NULL);

	  if (factor_out_conditional_conversion (e1, e2, phi, arg0, arg1))
	    {
	      /* factor_out_conditional_conversion may create a new PHI in
		 BB2 and eliminate an existing PHI in BB2.  Recompute values
		 that may be affected by that change.  */
	      phis = phi_nodes (bb2);
	      phi = single_non_singleton_phi_for_edges (phis, e1, e2);
	      gcc_assert (phi);
	      arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
	      arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
	      gcc_assert (arg0 != NULL && arg1 != NULL);
	    }

	  /* Do the replacement of conditional if it can be done.  */
	  if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
	    cfgchanged = true;
	  else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
	    cfgchanged = true;
	  else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
	    cfgchanged = true;
	}
    }

  free (bb_order);

  if (do_store_elim)
    delete nontrap;
  /* If the CFG has changed, we should cleanup the CFG.  */
  if (cfgchanged && do_store_elim)
    {
      /* In cond-store replacement we have added some loads on edges
         and new VOPS (as we moved the store, and created a load).  */
      gsi_commit_edge_inserts ();
      return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
    }
  else if (cfgchanged)
    return TODO_cleanup_cfg;
  return 0;
}
Beispiel #17
0
static unsigned int
tree_ssa_phiopt (void)
{
  basic_block bb;
  basic_block *bb_order;
  unsigned n, i;
  bool cfgchanged = false;

  /* Search every basic block for COND_EXPR we may be able to optimize.

     We walk the blocks in order that guarantees that a block with
     a single predecessor is processed before the predecessor.
     This ensures that we collapse inner ifs before visiting the
     outer ones, and also that we do not try to visit a removed
     block.  */
  bb_order = blocks_in_phiopt_order ();
  n = n_basic_blocks - NUM_FIXED_BLOCKS;

  for (i = 0; i < n; i++) 
    {
      tree cond_expr;
      tree phi;
      basic_block bb1, bb2;
      edge e1, e2;
      tree arg0, arg1;

      bb = bb_order[i];

      cond_expr = last_stmt (bb);
      /* Check to see if the last statement is a COND_EXPR.  */
      if (!cond_expr
          || TREE_CODE (cond_expr) != COND_EXPR)
        continue;

      e1 = EDGE_SUCC (bb, 0);
      bb1 = e1->dest;
      e2 = EDGE_SUCC (bb, 1);
      bb2 = e2->dest;

      /* We cannot do the optimization on abnormal edges.  */
      if ((e1->flags & EDGE_ABNORMAL) != 0
          || (e2->flags & EDGE_ABNORMAL) != 0)
       continue;

      /* If either bb1's succ or bb2 or bb2's succ is non NULL.  */
      if (EDGE_COUNT (bb1->succs) == 0
          || bb2 == NULL
          || EDGE_COUNT (bb2->succs) == 0)
        continue;

      /* Find the bb which is the fall through to the other.  */
      if (EDGE_SUCC (bb1, 0)->dest == bb2)
        ;
      else if (EDGE_SUCC (bb2, 0)->dest == bb1)
        {
          basic_block bb_tmp = bb1;
          edge e_tmp = e1;
          bb1 = bb2;
          bb2 = bb_tmp;
          e1 = e2;
          e2 = e_tmp;
        }
      else
        continue;

      e1 = EDGE_SUCC (bb1, 0);

      /* Make sure that bb1 is just a fall through.  */
      if (!single_succ_p (bb1)
          || (e1->flags & EDGE_FALLTHRU) == 0)
        continue;

      /* Also make sure that bb1 only have one predecessor and that it
         is bb.  */
      if (!single_pred_p (bb1)
          || single_pred (bb1) != bb)
        continue;

      phi = phi_nodes (bb2);

      /* Check to make sure that there is only one PHI node.
         TODO: we could do it with more than one iff the other PHI nodes
         have the same elements for these two edges.  */
      if (!phi || PHI_CHAIN (phi) != NULL)
        continue;

      arg0 = PHI_ARG_DEF_TREE (phi, e1->dest_idx);
      arg1 = PHI_ARG_DEF_TREE (phi, e2->dest_idx);

      /* Something is wrong if we cannot find the arguments in the PHI
         node.  */
      gcc_assert (arg0 != NULL && arg1 != NULL);

      /* Do the replacement of conditional if it can be done.  */
      if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
        cfgchanged = true;
      else if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
        cfgchanged = true;
      else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
        cfgchanged = true;
      else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
        cfgchanged = true;
    }

  free (bb_order);
  
  /* If the CFG has changed, we should cleanup the CFG. */
  return cfgchanged ? TODO_cleanup_cfg : 0;
}
Beispiel #18
0
static bool
try_unroll_loop_completely (struct loop *loop,
			    edge exit, tree niter,
			    enum unroll_level ul,
			    HOST_WIDE_INT maxiter,
			    location_t locus)
{
  unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
  struct loop_size size;
  bool n_unroll_found = false;
  edge edge_to_cancel = NULL;
  int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;

  /* See if we proved number of iterations to be low constant.

     EXIT is an edge that will be removed in all but last iteration of 
     the loop.

     EDGE_TO_CACNEL is an edge that will be removed from the last iteration
     of the unrolled sequence and is expected to make the final loop not
     rolling. 

     If the number of execution of loop is determined by standard induction
     variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
     from the iv test.  */
  if (tree_fits_uhwi_p (niter))
    {
      n_unroll = tree_to_uhwi (niter);
      n_unroll_found = true;
      edge_to_cancel = EDGE_SUCC (exit->src, 0);
      if (edge_to_cancel == exit)
	edge_to_cancel = EDGE_SUCC (exit->src, 1);
    }
  /* We do not know the number of iterations and thus we can not eliminate
     the EXIT edge.  */
  else
    exit = NULL;

  /* See if we can improve our estimate by using recorded loop bounds.  */
  if (maxiter >= 0
      && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
    {
      n_unroll = maxiter;
      n_unroll_found = true;
      /* Loop terminates before the IV variable test, so we can not
	 remove it in the last iteration.  */
      edge_to_cancel = NULL;
    }

  if (!n_unroll_found)
    return false;

  if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
    {
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file, "Not unrolling loop %d "
		 "(--param max-completely-peeled-times limit reached).\n",
		 loop->num);
      return false;
    }

  if (!edge_to_cancel)
    edge_to_cancel = loop_edge_to_cancel (loop);

  if (n_unroll)
    {
      sbitmap wont_exit;
      edge e;
      unsigned i;
      bool large;
      vec<edge> to_remove = vNULL;
      if (ul == UL_SINGLE_ITER)
	return false;

      large = tree_estimate_loop_size
		 (loop, exit, edge_to_cancel, &size,
		  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
      ninsns = size.overall;
      if (large)
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
		     loop->num);
	  return false;
	}

      unr_insns = estimated_unrolled_size (&size, n_unroll);
      if (dump_file && (dump_flags & TDF_DETAILS))
	{
	  fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
	  fprintf (dump_file, "  Estimated size after unrolling: %d\n",
		   (int) unr_insns);
	}

      /* If the code is going to shrink, we don't need to be extra cautious
	 on guessing if the unrolling is going to be profitable.  */
      if (unr_insns
	  /* If there is IV variable that will become constant, we save
	     one instruction in the loop prologue we do not account
	     otherwise.  */
	  <= ninsns + (size.constant_iv != false))
	;
      /* We unroll only inner loops, because we do not consider it profitable
	 otheriwse.  We still can cancel loopback edge of not rolling loop;
	 this is always a good idea.  */
      else if (ul == UL_NO_GROWTH)
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
		     loop->num);
	  return false;
	}
      /* Outer loops tend to be less interesting candidates for complete
	 unrolling unless we can do a lot of propagation into the inner loop
	 body.  For now we disable outer loop unrolling when the code would
	 grow.  */
      else if (loop->inner)
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d: "
		     "it is not innermost and code would grow.\n",
		     loop->num);
	  return false;
	}
      /* If there is call on a hot path through the loop, then
	 there is most probably not much to optimize.  */
      else if (size.num_non_pure_calls_on_hot_path)
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d: "
		     "contains call and code would grow.\n",
		     loop->num);
	  return false;
	}
      /* If there is pure/const call in the function, then we
	 can still optimize the unrolled loop body if it contains
	 some other interesting code than the calls and code
	 storing or cumulating the return value.  */
      else if (size.num_pure_calls_on_hot_path
	       /* One IV increment, one test, one ivtmp store
		  and one useful stmt.  That is about minimal loop
		  doing pure call.  */
	       && (size.non_call_stmts_on_hot_path
		   <= 3 + size.num_pure_calls_on_hot_path))
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d: "
		     "contains just pure calls and code would grow.\n",
		     loop->num);
	  return false;
	}
      /* Complette unrolling is major win when control flow is removed and
	 one big basic block is created.  If the loop contains control flow
	 the optimization may still be a win because of eliminating the loop
	 overhead but it also may blow the branch predictor tables.
	 Limit number of branches on the hot path through the peeled
	 sequence.  */
      else if (size.num_branches_on_hot_path * (int)n_unroll
	       > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d: "
		     " number of branches on hot path in the unrolled sequence"
		     " reach --param max-peel-branches limit.\n",
		     loop->num);
	  return false;
	}
      else if (unr_insns
	       > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d: "
		     "(--param max-completely-peeled-insns limit reached).\n",
		     loop->num);
	  return false;
	}
      dump_printf_loc (report_flags, locus,
                       "loop turned into non-loop; it never loops.\n");

      initialize_original_copy_tables ();
      wont_exit = sbitmap_alloc (n_unroll + 1);
      bitmap_ones (wont_exit);
      bitmap_clear_bit (wont_exit, 0);

      if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
						 n_unroll, wont_exit,
						 exit, &to_remove,
						 DLTHE_FLAG_UPDATE_FREQ
						 | DLTHE_FLAG_COMPLETTE_PEEL))
	{
          free_original_copy_tables ();
	  free (wont_exit);
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Failed to duplicate the loop\n");
	  return false;
	}

      FOR_EACH_VEC_ELT (to_remove, i, e)
	{
	  bool ok = remove_path (e);
	  gcc_assert (ok);
	}

      to_remove.release ();
      free (wont_exit);
      free_original_copy_tables ();
    }
/* The core routine of conditional store replacement and normal
   phi optimizations.  Both share much of the infrastructure in how
   to match applicable basic block patterns.  DO_STORE_ELIM is true
   when we want to do conditional store replacement, false otherwise.  */
static unsigned int
tree_ssa_phiopt_worker (bool do_store_elim)
{
    basic_block bb;
    basic_block *bb_order;
    unsigned n, i;
    bool cfgchanged = false;
    struct pointer_set_t *nontrap = 0;

    if (do_store_elim)
    {
        condstoretemp = NULL_TREE;
        /* Calculate the set of non-trapping memory accesses.  */
        nontrap = get_non_trapping ();
    }

    /* Search every basic block for COND_EXPR we may be able to optimize.

       We walk the blocks in order that guarantees that a block with
       a single predecessor is processed before the predecessor.
       This ensures that we collapse inner ifs before visiting the
       outer ones, and also that we do not try to visit a removed
       block.  */
    bb_order = blocks_in_phiopt_order ();
    n = n_basic_blocks - NUM_FIXED_BLOCKS;

    for (i = 0; i < n; i++)
    {
        gimple cond_stmt, phi;
        basic_block bb1, bb2;
        edge e1, e2;
        tree arg0, arg1;

        bb = bb_order[i];

        cond_stmt = last_stmt (bb);
        /* Check to see if the last statement is a GIMPLE_COND.  */
        if (!cond_stmt
                || gimple_code (cond_stmt) != GIMPLE_COND)
            continue;

        e1 = EDGE_SUCC (bb, 0);
        bb1 = e1->dest;
        e2 = EDGE_SUCC (bb, 1);
        bb2 = e2->dest;

        /* We cannot do the optimization on abnormal edges.  */
        if ((e1->flags & EDGE_ABNORMAL) != 0
                || (e2->flags & EDGE_ABNORMAL) != 0)
            continue;

        /* If either bb1's succ or bb2 or bb2's succ is non NULL.  */
        if (EDGE_COUNT (bb1->succs) == 0
                || bb2 == NULL
                || EDGE_COUNT (bb2->succs) == 0)
            continue;

        /* Find the bb which is the fall through to the other.  */
        if (EDGE_SUCC (bb1, 0)->dest == bb2)
            ;
        else if (EDGE_SUCC (bb2, 0)->dest == bb1)
        {
            basic_block bb_tmp = bb1;
            edge e_tmp = e1;
            bb1 = bb2;
            bb2 = bb_tmp;
            e1 = e2;
            e2 = e_tmp;
        }
        else
            continue;

        e1 = EDGE_SUCC (bb1, 0);

        /* Make sure that bb1 is just a fall through.  */
        if (!single_succ_p (bb1)
                || (e1->flags & EDGE_FALLTHRU) == 0)
            continue;

        /* Also make sure that bb1 only have one predecessor and that it
        is bb.  */
        if (!single_pred_p (bb1)
                || single_pred (bb1) != bb)
            continue;

        if (do_store_elim)
        {
            /* bb1 is the middle block, bb2 the join block, bb the split block,
               e1 the fallthrough edge from bb1 to bb2.  We can't do the
               optimization if the join block has more than two predecessors.  */
            if (EDGE_COUNT (bb2->preds) > 2)
                continue;
            if (cond_store_replacement (bb1, bb2, e1, e2, nontrap))
                cfgchanged = true;
        }
        else
        {
            gimple_seq phis = phi_nodes (bb2);

            /* Check to make sure that there is only one PHI node.
               TODO: we could do it with more than one iff the other PHI nodes
               have the same elements for these two edges.  */
            if (! gimple_seq_singleton_p (phis))
                continue;

            phi = gsi_stmt (gsi_start (phis));
            arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
            arg1 = gimple_phi_arg_def (phi, e2->dest_idx);

            /* Something is wrong if we cannot find the arguments in the PHI
               node.  */
            gcc_assert (arg0 != NULL && arg1 != NULL);

            /* Do the replacement of conditional if it can be done.  */
            if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
                cfgchanged = true;
            else if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
                cfgchanged = true;
            else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
                cfgchanged = true;
            else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
                cfgchanged = true;
        }
    }

    free (bb_order);

    if (do_store_elim)
        pointer_set_destroy (nontrap);
    /* If the CFG has changed, we should cleanup the CFG.  */
    if (cfgchanged && do_store_elim)
    {
        /* In cond-store replacement we have added some loads on edges
           and new VOPS (as we moved the store, and created a load).  */
        gsi_commit_edge_inserts ();
        return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
    }
    else if (cfgchanged)
        return TODO_cleanup_cfg;
    return 0;
}