/* Gets exit edges of a LOOP, returning their number in N_EDGES. */ edge * get_loop_exit_edges (const struct loop *loop, unsigned int *n_edges) { edge *edges, e; unsigned i, n; basic_block * body; if (loop->latch == EXIT_BLOCK_PTR) abort (); body = get_loop_body (loop); n = 0; for (i = 0; i < loop->num_nodes; i++) for (e = body[i]->succ; e; e = e->succ_next) if (!flow_bb_inside_loop_p (loop, e->dest)) n++; edges = xmalloc (n * sizeof (edge)); *n_edges = n; n = 0; for (i = 0; i < loop->num_nodes; i++) for (e = body[i]->succ; e; e = e->succ_next) if (!flow_bb_inside_loop_p (loop, e->dest)) edges[n++] = e; free (body); return edges; }
static bool should_duplicate_loop_header_p (basic_block header, struct loop *loop, int *limit) { gimple_stmt_iterator bsi; gimple last; /* Do not copy one block more than once (we do not really want to do loop peeling here). */ if (header->aux) return false; /* Loop header copying usually increases size of the code. This used not to be true, since quite often it is possible to verify that the condition is satisfied in the first iteration and therefore to eliminate it. Jump threading handles these cases now. */ if (optimize_loop_for_size_p (loop)) return false; gcc_assert (EDGE_COUNT (header->succs) > 0); if (single_succ_p (header)) return false; if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest) && flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 1)->dest)) return false; /* If this is not the original loop header, we want it to have just one predecessor in order to match the && pattern. */ if (header != loop->header && !single_pred_p (header)) return false; last = last_stmt (header); if (gimple_code (last) != GIMPLE_COND) return false; /* Approximately copy the conditions that used to be used in jump.c -- at most 20 insns and no calls. */ for (bsi = gsi_start_bb (header); !gsi_end_p (bsi); gsi_next (&bsi)) { last = gsi_stmt (bsi); if (gimple_code (last) == GIMPLE_LABEL) continue; if (is_gimple_debug (last)) continue; if (is_gimple_call (last)) return false; *limit -= estimate_num_insns (last, &eni_size_weights); if (*limit < 0) return false; } return true; }
static void flow_loop_exit_edges_find (struct loop *loop) { edge e; basic_block node, *bbs; unsigned num_exits, i; loop->exit_edges = NULL; loop->num_exits = 0; /* Check all nodes within the loop to see if there are any successors not in the loop. Note that a node may have multiple exiting edges. */ num_exits = 0; bbs = get_loop_body (loop); for (i = 0; i < loop->num_nodes; i++) { node = bbs[i]; for (e = node->succ; e; e = e->succ_next) { basic_block dest = e->dest; if (!flow_bb_inside_loop_p (loop, dest)) num_exits++; } } if (! num_exits) { free (bbs); return; } loop->exit_edges = xmalloc (num_exits * sizeof (edge *)); /* Store all exiting edges into an array. */ num_exits = 0; for (i = 0; i < loop->num_nodes; i++) { node = bbs[i]; for (e = node->succ; e; e = e->succ_next) { basic_block dest = e->dest; if (!flow_bb_inside_loop_p (loop, dest)) loop->exit_edges[num_exits++] = e; } } free (bbs); loop->num_exits = num_exits; }
static tree tree_may_unswitch_on (basic_block bb, struct loop *loop) { gimple stmt, def; tree cond, use; basic_block def_bb; ssa_op_iter iter; /* BB must end in a simple conditional jump. */ stmt = last_stmt (bb); if (!stmt || gimple_code (stmt) != GIMPLE_COND) return NULL_TREE; /* To keep the things simple, we do not directly remove the conditions, but just replace tests with 0 != 0 resp. 1 != 0. Prevent the infinite loop where we would unswitch again on such a condition. */ if (gimple_cond_true_p (stmt) || gimple_cond_false_p (stmt)) return NULL_TREE; /* Condition must be invariant. */ FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) { def = SSA_NAME_DEF_STMT (use); def_bb = gimple_bb (def); if (def_bb && flow_bb_inside_loop_p (loop, def_bb)) return NULL_TREE; }
bool flow_loop_outside_edge_p (const struct loop *loop, edge e) { if (e->dest != loop->header) abort (); return !flow_bb_inside_loop_p (loop, e->src); }
static bool should_duplicate_loop_header_p (basic_block header, struct loop *loop, int *limit) { block_stmt_iterator bsi; tree last; /* Do not copy one block more than once (we do not really want to do loop peeling here). */ if (header->aux) return false; gcc_assert (EDGE_COUNT (header->succs) > 0); if (EDGE_COUNT (header->succs) == 1) return false; if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest) && flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 1)->dest)) return false; /* If this is not the original loop header, we want it to have just one predecessor in order to match the && pattern. */ if (header != loop->header && EDGE_COUNT (header->preds) >= 2) return false; last = last_stmt (header); if (TREE_CODE (last) != COND_EXPR) return false; /* Approximately copy the conditions that used to be used in jump.c -- at most 20 insns and no calls. */ for (bsi = bsi_start (header); !bsi_end_p (bsi); bsi_next (&bsi)) { last = bsi_stmt (bsi); if (TREE_CODE (last) == LABEL_EXPR) continue; if (get_call_expr_in (last)) return false; *limit -= estimate_num_insns (last); if (*limit < 0) return false; } return true; }
static tree tree_may_unswitch_on (basic_block bb, struct loop *loop) { tree stmt, def, cond, use; basic_block def_bb; ssa_op_iter iter; /* BB must end in a simple conditional jump. */ stmt = last_stmt (bb); if (!stmt || TREE_CODE (stmt) != COND_EXPR) return NULL_TREE; /* Condition must be invariant. */ FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) { def = SSA_NAME_DEF_STMT (use); def_bb = bb_for_stmt (def); if (def_bb && flow_bb_inside_loop_p (loop, def_bb)) return NULL_TREE; }
/* Checks that LOOPS are all right: -- sizes of loops are all right -- results of get_loop_body really belong to the loop -- loop header have just single entry edge and single latch edge -- loop latches have only single successor that is header of their loop -- irreducible loops are correctly marked */ void verify_loop_structure (struct loops *loops) { unsigned *sizes, i, j; sbitmap irreds; basic_block *bbs, bb; struct loop *loop; int err = 0; edge e; /* Check sizes. */ sizes = xcalloc (loops->num, sizeof (int)); sizes[0] = 2; FOR_EACH_BB (bb) for (loop = bb->loop_father; loop; loop = loop->outer) sizes[loop->num]++; for (i = 0; i < loops->num; i++) { if (!loops->parray[i]) continue; if (loops->parray[i]->num_nodes != sizes[i]) { error ("Size of loop %d should be %d, not %d.", i, sizes[i], loops->parray[i]->num_nodes); err = 1; } } free (sizes); /* Check get_loop_body. */ for (i = 1; i < loops->num; i++) { loop = loops->parray[i]; if (!loop) continue; bbs = get_loop_body (loop); for (j = 0; j < loop->num_nodes; j++) if (!flow_bb_inside_loop_p (loop, bbs[j])) { error ("Bb %d do not belong to loop %d.", bbs[j]->index, i); err = 1; } free (bbs); } /* Check headers and latches. */ for (i = 1; i < loops->num; i++) { loop = loops->parray[i]; if (!loop) continue; if ((loops->state & LOOPS_HAVE_PREHEADERS) && (!loop->header->pred->pred_next || loop->header->pred->pred_next->pred_next)) { error ("Loop %d's header does not have exactly 2 entries.", i); err = 1; } if (loops->state & LOOPS_HAVE_SIMPLE_LATCHES) { if (!loop->latch->succ || loop->latch->succ->succ_next) { error ("Loop %d's latch does not have exactly 1 successor.", i); err = 1; } if (loop->latch->succ->dest != loop->header) { error ("Loop %d's latch does not have header as successor.", i); err = 1; } if (loop->latch->loop_father != loop) { error ("Loop %d's latch does not belong directly to it.", i); err = 1; } } if (loop->header->loop_father != loop) { error ("Loop %d's header does not belong directly to it.", i); err = 1; } if ((loops->state & LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS) && (loop_latch_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)) { error ("Loop %d's latch is marked as part of irreducible region.", i); err = 1; } } /* Check irreducible loops. */ if (loops->state & LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS) { /* Record old info. */ irreds = sbitmap_alloc (last_basic_block); FOR_EACH_BB (bb) { if (bb->flags & BB_IRREDUCIBLE_LOOP) SET_BIT (irreds, bb->index); else RESET_BIT (irreds, bb->index); for (e = bb->succ; e; e = e->succ_next) if (e->flags & EDGE_IRREDUCIBLE_LOOP) e->flags |= EDGE_ALL_FLAGS + 1; } /* Recount it. */ mark_irreducible_loops (loops); /* Compare. */ FOR_EACH_BB (bb) { if ((bb->flags & BB_IRREDUCIBLE_LOOP) && !TEST_BIT (irreds, bb->index)) { error ("Basic block %d should be marked irreducible.", bb->index); err = 1; } else if (!(bb->flags & BB_IRREDUCIBLE_LOOP) && TEST_BIT (irreds, bb->index)) { error ("Basic block %d should not be marked irreducible.", bb->index); err = 1; } for (e = bb->succ; e; e = e->succ_next) { if ((e->flags & EDGE_IRREDUCIBLE_LOOP) && !(e->flags & (EDGE_ALL_FLAGS + 1))) { error ("Edge from %d to %d should be marked irreducible.", e->src->index, e->dest->index); err = 1; } else if (!(e->flags & EDGE_IRREDUCIBLE_LOOP) && (e->flags & (EDGE_ALL_FLAGS + 1))) { error ("Edge from %d to %d should not be marked irreducible.", e->src->index, e->dest->index); err = 1; } e->flags &= ~(EDGE_ALL_FLAGS + 1); } } free (irreds); }
static struct loop * unswitch_loop (struct loop *loop, basic_block unswitch_on, rtx cond, rtx cinsn) { edge entry, latch_edge, true_edge, false_edge, e; basic_block switch_bb, unswitch_on_alt; struct loop *nloop; int irred_flag, prob; rtx seq; /* Some sanity checking. */ gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on)); gcc_assert (EDGE_COUNT (unswitch_on->succs) == 2); gcc_assert (just_once_each_iteration_p (loop, unswitch_on)); gcc_assert (!loop->inner); gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 0)->dest)); gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 1)->dest)); entry = loop_preheader_edge (loop); /* Make a copy. */ irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP; entry->flags &= ~EDGE_IRREDUCIBLE_LOOP; if (!duplicate_loop_to_header_edge (loop, entry, 1, NULL, NULL, NULL, 0)) return NULL; entry->flags |= irred_flag; /* Record the block with condition we unswitch on. */ unswitch_on_alt = get_bb_copy (unswitch_on); true_edge = BRANCH_EDGE (unswitch_on_alt); false_edge = FALLTHRU_EDGE (unswitch_on); latch_edge = single_succ_edge (get_bb_copy (loop->latch)); /* Create a block with the condition. */ prob = true_edge->probability; switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond), block_label (true_edge->dest), prob, cinsn); emit_insn_after (seq, BB_END (switch_bb)); e = make_edge (switch_bb, true_edge->dest, 0); e->probability = prob; e->count = latch_edge->count * prob / REG_BR_PROB_BASE; e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU); e->probability = false_edge->probability; e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE; if (irred_flag) { switch_bb->flags |= BB_IRREDUCIBLE_LOOP; EDGE_SUCC (switch_bb, 0)->flags |= EDGE_IRREDUCIBLE_LOOP; EDGE_SUCC (switch_bb, 1)->flags |= EDGE_IRREDUCIBLE_LOOP; } else { switch_bb->flags &= ~BB_IRREDUCIBLE_LOOP; EDGE_SUCC (switch_bb, 0)->flags &= ~EDGE_IRREDUCIBLE_LOOP; EDGE_SUCC (switch_bb, 1)->flags &= ~EDGE_IRREDUCIBLE_LOOP; } /* Loopify from the copy of LOOP body, constructing the new loop. */ nloop = loopify (latch_edge, single_pred_edge (get_bb_copy (loop->header)), switch_bb, BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true, prob, REG_BR_PROB_BASE - prob); copy_loop_info (loop, nloop); /* Remove branches that are now unreachable in new loops. */ remove_path (true_edge); remove_path (false_edge); /* Preserve the simple loop preheaders. */ split_edge (loop_preheader_edge (loop)); split_edge (loop_preheader_edge (nloop)); return nloop; }
static rtx may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn) { rtx test, at, op[2], stest; struct rtx_iv iv; unsigned i; enum machine_mode mode; /* BB must end in a simple conditional jump. */ if (EDGE_COUNT (bb->succs) != 2) return NULL_RTX; if (!any_condjump_p (BB_END (bb))) return NULL_RTX; /* With branches inside loop. */ if (!flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 0)->dest) || !flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 1)->dest)) return NULL_RTX; /* It must be executed just once each iteration (because otherwise we are unable to update dominator/irreducible loop information correctly). */ if (!just_once_each_iteration_p (loop, bb)) return NULL_RTX; /* Condition must be invariant. */ test = get_condition (BB_END (bb), &at, true, false); if (!test) return NULL_RTX; for (i = 0; i < 2; i++) { op[i] = XEXP (test, i); if (CONSTANT_P (op[i])) continue; if (!iv_analyze (at, op[i], &iv)) return NULL_RTX; if (iv.step != const0_rtx || iv.first_special) return NULL_RTX; op[i] = get_iv_value (&iv, const0_rtx); } mode = GET_MODE (op[0]); if (mode == VOIDmode) mode = GET_MODE (op[1]); if (GET_MODE_CLASS (mode) == MODE_CC) { if (at != BB_END (bb)) return NULL_RTX; if (!rtx_equal_p (op[0], XEXP (test, 0)) || !rtx_equal_p (op[1], XEXP (test, 1))) return NULL_RTX; *cinsn = BB_END (bb); return test; } stest = simplify_gen_relational (GET_CODE (test), SImode, mode, op[0], op[1]); if (stest == const0_rtx || stest == const_true_rtx) return stest; return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode, op[0], op[1])); }
static gimple vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out) { gimple stmt; tree oprnd0, oprnd1; tree oprnd00, oprnd01; stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); tree type, half_type; gimple pattern_stmt; tree prod_type; loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_info); tree var; if (!is_gimple_assign (last_stmt)) return NULL; type = gimple_expr_type (last_stmt); /* Look for the following pattern DX = (TYPE1) X; DY = (TYPE1) Y; DPROD = DX * DY; DDPROD = (TYPE2) DPROD; sum_1 = DDPROD + sum_0; In which - DX is double the size of X - DY is double the size of Y - DX, DY, DPROD all have the same type - sum is the same size of DPROD or bigger - sum has been recognized as a reduction variable. This is equivalent to: DPROD = X w* Y; #widen mult sum_1 = DPROD w+ sum_0; #widen summation or DPROD = X w* Y; #widen mult sum_1 = DPROD + sum_0; #summation */ /* Starting from LAST_STMT, follow the defs of its uses in search of the above pattern. */ if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) return NULL; if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) { /* Has been detected as widening-summation? */ stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); type = gimple_expr_type (stmt); if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR) return NULL; oprnd0 = gimple_assign_rhs1 (stmt); oprnd1 = gimple_assign_rhs2 (stmt); half_type = TREE_TYPE (oprnd0); } else { gimple def_stmt; if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) return NULL; oprnd0 = gimple_assign_rhs1 (last_stmt); oprnd1 = gimple_assign_rhs2 (last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) || !types_compatible_p (TREE_TYPE (oprnd1), type)) return NULL; stmt = last_stmt; if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt)) { stmt = def_stmt; oprnd0 = gimple_assign_rhs1 (stmt); } else half_type = type; } /* So far so good. Since last_stmt was detected as a (summation) reduction, we know that oprnd1 is the reduction variable (defined by a loop-header phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. Left to check that oprnd0 is defined by a (widen_)mult_expr */ prod_type = half_type; stmt = SSA_NAME_DEF_STMT (oprnd0); /* It could not be the dot_prod pattern if the stmt is outside the loop. */ if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt))) return NULL; /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi inside the loop (in case we are analyzing an outer-loop). */ if (!is_gimple_assign (stmt)) return NULL; stmt_vinfo = vinfo_for_stmt (stmt); gcc_assert (stmt_vinfo); if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) return NULL; if (gimple_assign_rhs_code (stmt) != MULT_EXPR) return NULL; if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) { /* Has been detected as a widening multiplication? */ stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR) return NULL; stmt_vinfo = vinfo_for_stmt (stmt); gcc_assert (stmt_vinfo); gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def); oprnd00 = gimple_assign_rhs1 (stmt); oprnd01 = gimple_assign_rhs2 (stmt); } else { tree half_type0, half_type1; gimple def_stmt; tree oprnd0, oprnd1; oprnd0 = gimple_assign_rhs1 (stmt); oprnd1 = gimple_assign_rhs2 (stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) || !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) return NULL; if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt)) return NULL; oprnd00 = gimple_assign_rhs1 (def_stmt); if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt)) return NULL; oprnd01 = gimple_assign_rhs1 (def_stmt); if (!types_compatible_p (half_type0, half_type1)) return NULL; if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2) return NULL; } half_type = TREE_TYPE (oprnd00); *type_in = half_type; *type_out = type; /* Pattern detected. Create a stmt to be used to replace the pattern: */ var = vect_recog_temp_ssa_var (type, NULL); pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var, oprnd00, oprnd01, oprnd1); if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: "); print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); } /* We don't allow changing the order of the computation in the inner-loop when doing outer-loop vectorization. */ gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); return pattern_stmt; }
static void copy_loop_headers (void) { struct loops *loops; unsigned i; struct loop *loop; basic_block header; edge exit; basic_block *bbs; unsigned n_bbs; loops = loop_optimizer_init (dump_file); if (!loops) return; rewrite_into_loop_closed_ssa (); /* We do not try to keep the information about irreducible regions up-to-date. */ loops->state &= ~LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS; #ifdef ENABLE_CHECKING verify_loop_structure (loops); #endif bbs = xmalloc (sizeof (basic_block) * n_basic_blocks); for (i = 1; i < loops->num; i++) { /* Copy at most 20 insns. */ int limit = 20; loop = loops->parray[i]; if (!loop) continue; header = loop->header; /* If the loop is already a do-while style one (either because it was written as such, or because jump threading transformed it into one), we might be in fact peeling the first iteration of the loop. This in general is not a good idea. */ if (do_while_loop_p (loop)) continue; /* Iterate the header copying up to limit; this takes care of the cases like while (a && b) {...}, where we want to have both of the conditions copied. TODO -- handle while (a || b) - like cases, by not requiring the header to have just a single successor and copying up to postdominator. */ exit = NULL; n_bbs = 0; while (should_duplicate_loop_header_p (header, loop, &limit)) { /* Find a successor of header that is inside a loop; i.e. the new header after the condition is copied. */ if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)) exit = EDGE_SUCC (header, 0); else exit = EDGE_SUCC (header, 1); bbs[n_bbs++] = header; header = exit->dest; } if (!exit) continue; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Duplicating header of the loop %d up to edge %d->%d.\n", loop->num, exit->src->index, exit->dest->index); /* Ensure that the header will have just the latch as a predecessor inside the loop. */ if (EDGE_COUNT (exit->dest->preds) > 1) exit = EDGE_SUCC (loop_split_edge_with (exit, NULL), 0); if (!tree_duplicate_sese_region (loop_preheader_edge (loop), exit, bbs, n_bbs, NULL)) { fprintf (dump_file, "Duplication failed.\n"); continue; } /* Ensure that the latch and the preheader is simple (we know that they are not now, since there was the loop exit condition. */ loop_split_edge_with (loop_preheader_edge (loop), NULL); loop_split_edge_with (loop_latch_edge (loop), NULL); } free (bbs); #ifdef ENABLE_CHECKING verify_loop_closed_ssa (); #endif loop_optimizer_finalize (loops, NULL); }
static unsigned int copy_loop_headers (void) { struct loops *loops; unsigned i; struct loop *loop; basic_block header; edge exit, entry; basic_block *bbs, *copied_bbs; unsigned n_bbs; unsigned bbs_size; loops = loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES); if (!loops) return 0; #ifdef ENABLE_CHECKING verify_loop_structure (loops); #endif bbs = XNEWVEC (basic_block, n_basic_blocks); copied_bbs = XNEWVEC (basic_block, n_basic_blocks); bbs_size = n_basic_blocks; for (i = 1; i < loops->num; i++) { /* Copy at most 20 insns. */ int limit = 20; loop = loops->parray[i]; if (!loop) continue; header = loop->header; /* If the loop is already a do-while style one (either because it was written as such, or because jump threading transformed it into one), we might be in fact peeling the first iteration of the loop. This in general is not a good idea. */ if (do_while_loop_p (loop)) continue; /* Iterate the header copying up to limit; this takes care of the cases like while (a && b) {...}, where we want to have both of the conditions copied. TODO -- handle while (a || b) - like cases, by not requiring the header to have just a single successor and copying up to postdominator. */ exit = NULL; n_bbs = 0; while (should_duplicate_loop_header_p (header, loop, &limit)) { /* Find a successor of header that is inside a loop; i.e. the new header after the condition is copied. */ if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)) exit = EDGE_SUCC (header, 0); else exit = EDGE_SUCC (header, 1); bbs[n_bbs++] = header; gcc_assert (bbs_size > n_bbs); header = exit->dest; } if (!exit) continue; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Duplicating header of the loop %d up to edge %d->%d.\n", loop->num, exit->src->index, exit->dest->index); /* Ensure that the header will have just the latch as a predecessor inside the loop. */ if (!single_pred_p (exit->dest)) exit = single_pred_edge (loop_split_edge_with (exit, NULL)); entry = loop_preheader_edge (loop); if (!tree_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs)) { fprintf (dump_file, "Duplication failed.\n"); continue; } /* If the loop has the form "for (i = j; i < j + 10; i++)" then this copying can introduce a case where we rely on undefined signed overflow to eliminate the preheader condition, because we assume that "j < j + 10" is true. We don't want to warn about that case for -Wstrict-overflow, because in general we don't warn about overflow involving loops. Prevent the warning by setting TREE_NO_WARNING. */ if (warn_strict_overflow > 0) { unsigned int i; for (i = 0; i < n_bbs; ++i) { tree last; last = last_stmt (copied_bbs[i]); if (TREE_CODE (last) == COND_EXPR) TREE_NO_WARNING (last) = 1; } } /* Ensure that the latch and the preheader is simple (we know that they are not now, since there was the loop exit condition. */ loop_split_edge_with (loop_preheader_edge (loop), NULL); loop_split_edge_with (loop_latch_edge (loop), NULL); } free (bbs); free (copied_bbs); loop_optimizer_finalize (loops); return 0; }
static bool should_duplicate_loop_header_p (basic_block header, struct loop *loop, int *limit) { gimple_stmt_iterator bsi; gimple *last; gcc_assert (!header->aux); /* Loop header copying usually increases size of the code. This used not to be true, since quite often it is possible to verify that the condition is satisfied in the first iteration and therefore to eliminate it. Jump threading handles these cases now. */ if (optimize_loop_for_size_p (loop)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Not duplicating bb %i: optimizing for size.\n", header->index); return false; } gcc_assert (EDGE_COUNT (header->succs) > 0); if (single_succ_p (header)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Not duplicating bb %i: it is single succ.\n", header->index); return false; } if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest) && flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 1)->dest)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Not duplicating bb %i: both sucessors are in loop.\n", loop->num); return false; } /* If this is not the original loop header, we want it to have just one predecessor in order to match the && pattern. */ if (header != loop->header && !single_pred_p (header)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Not duplicating bb %i: it has mutiple predecestors.\n", header->index); return false; } last = last_stmt (header); if (gimple_code (last) != GIMPLE_COND) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Not duplicating bb %i: it does not end by conditional.\n", header->index); return false; } /* Count number of instructions and punt on calls. */ for (bsi = gsi_start_bb (header); !gsi_end_p (bsi); gsi_next (&bsi)) { last = gsi_stmt (bsi); if (gimple_code (last) == GIMPLE_LABEL) continue; if (is_gimple_debug (last)) continue; if (gimple_code (last) == GIMPLE_CALL && !gimple_inexpensive_call_p (as_a <gcall *> (last))) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Not duplicating bb %i: it contains call.\n", header->index); return false; } *limit -= estimate_num_insns (last, &eni_size_weights); if (*limit < 0) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Not duplicating bb %i contains too many insns.\n", header->index); return false; } } if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Will duplicate bb %i\n", header->index); return true; }