static unsigned int tree_ssa_ifcombine (void) { basic_block *bbs; bool cfg_changed = false; int i; bbs = single_pred_before_succ_order (); calculate_dominance_info (CDI_DOMINATORS); /* Search every basic block for COND_EXPR we may be able to optimize. We walk the blocks in order that guarantees that a block with a single predecessor is processed after the predecessor. This ensures that we collapse outter ifs before visiting the inner ones, and also that we do not try to visit a removed block. This is opposite of PHI-OPT, because we cascade the combining rather than cascading PHIs. */ for (i = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS - 1; i >= 0; i--) { basic_block bb = bbs[i]; gimple stmt = last_stmt (bb); if (stmt && gimple_code (stmt) == GIMPLE_COND) cfg_changed |= tree_ssa_ifcombine_bb (bb); } free (bbs); return cfg_changed ? TODO_cleanup_cfg : 0; }
static unsigned int tracer (void) { bool changed; if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1) return 0; mark_dfs_back_edges (); if (dump_file) brief_dump_cfg (dump_file, dump_flags); /* Trace formation is done on the fly inside tail_duplicate */ changed = tail_duplicate (); if (changed) { free_dominance_info (CDI_DOMINATORS); calculate_dominance_info (CDI_DOMINATORS); if (current_loops) fix_loop_structure (NULL); } if (dump_file) brief_dump_cfg (dump_file, dump_flags); return changed ? TODO_cleanup_cfg : 0; }
void loop_optimizer_init (unsigned flags) { timevar_push (TV_LOOP_INIT); if (!current_loops) { gcc_assert (!(cfun->curr_properties & PROP_loops)); /* Find the loops. */ current_loops = flow_loops_find (NULL); } else { bool recorded_exits = loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS); bool needs_fixup = loops_state_satisfies_p (LOOPS_NEED_FIXUP); gcc_assert (cfun->curr_properties & PROP_loops); /* Ensure that the dominators are computed, like flow_loops_find does. */ calculate_dominance_info (CDI_DOMINATORS); #ifdef ENABLE_CHECKING if (!needs_fixup) verify_loop_structure (); #endif /* Clear all flags. */ if (recorded_exits) release_recorded_exits (); loops_state_clear (~0U); if (needs_fixup) { /* Apply LOOPS_MAY_HAVE_MULTIPLE_LATCHES early as fix_loop_structure re-applies flags. */ loops_state_set (flags & LOOPS_MAY_HAVE_MULTIPLE_LATCHES); fix_loop_structure (NULL); } } /* Apply flags to loops. */ apply_loop_flags (flags); /* Dump loops. */ flow_loops_dump (dump_file, NULL, 1); #ifdef ENABLE_CHECKING verify_loop_structure (); #endif timevar_pop (TV_LOOP_INIT); }
static bool split_paths () { bool changed = false; loop_p loop; loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); initialize_original_copy_tables (); calculate_dominance_info (CDI_DOMINATORS); FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) { /* Only split paths if we are optimizing this loop for speed. */ if (!optimize_loop_for_speed_p (loop)) continue; /* See if there is a block that we can duplicate to split the path to the loop latch. */ basic_block bb = find_block_to_duplicate_for_splitting_paths (loop->latch); /* BB is the merge point for an IF-THEN-ELSE we want to transform. Essentially we want to create a duplicate of bb and redirect the first predecessor of BB to the duplicate (leaving the second predecessor as is. This will split the path leading to the latch re-using BB to avoid useless copying. */ if (bb && is_feasible_trace (bb)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Duplicating join block %d into predecessor paths\n", bb->index); basic_block pred0 = EDGE_PRED (bb, 0)->src; transform_duplicate (pred0, bb); changed = true; } } loop_optimizer_finalize (); free_original_copy_tables (); return changed; }
/* Main entry point. Perform loop unswitching on all suitable loops. */ void unswitch_loops (void) { struct loop *loop; bool changed = false; /* Go through inner loops (only original ones). */ FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) changed |= unswitch_single_loop (loop, NULL_RTX, 0); iv_analysis_done (); /* If we unswitched any loop discover new loops that are eventually exposed by making irreducible regions reducible. */ if (changed) { calculate_dominance_info (CDI_DOMINATORS); fix_loop_structure (NULL); } }
static unsigned int tree_ssa_ifcombine (void) { basic_block *bbs; bool cfg_changed = false; int i; bbs = single_pred_before_succ_order (); calculate_dominance_info (CDI_DOMINATORS); for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; ++i) { basic_block bb = bbs[i]; gimple stmt = last_stmt (bb); if (stmt && gimple_code (stmt) == GIMPLE_COND) cfg_changed |= tree_ssa_ifcombine_bb (bb); } free (bbs); return cfg_changed ? TODO_cleanup_cfg : 0; }
int flow_loops_find (struct loops *loops, int flags) { int i; int b; int num_loops; edge e; sbitmap headers; int *dfs_order; int *rc_order; basic_block header; basic_block bb; /* This function cannot be repeatedly called with different flags to build up the loop information. The loop tree must always be built if this function is called. */ if (! (flags & LOOP_TREE)) abort (); memset (loops, 0, sizeof *loops); /* Taking care of this degenerate case makes the rest of this code simpler. */ if (n_basic_blocks == 0) return 0; dfs_order = NULL; rc_order = NULL; /* Join loops with shared headers. */ canonicalize_loop_headers (); /* Compute the dominators. */ calculate_dominance_info (CDI_DOMINATORS); /* Count the number of loop headers. This should be the same as the number of natural loops. */ headers = sbitmap_alloc (last_basic_block); sbitmap_zero (headers); num_loops = 0; FOR_EACH_BB (header) { int more_latches = 0; header->loop_depth = 0; /* If we have an abnormal predecessor, do not consider the loop (not worth the problems). */ for (e = header->pred; e; e = e->pred_next) if (e->flags & EDGE_ABNORMAL) break; if (e) continue; for (e = header->pred; e; e = e->pred_next) { basic_block latch = e->src; if (e->flags & EDGE_ABNORMAL) abort (); /* Look for back edges where a predecessor is dominated by this block. A natural loop has a single entry node (header) that dominates all the nodes in the loop. It also has single back edge to the header from a latch node. */ if (latch != ENTRY_BLOCK_PTR && dominated_by_p (CDI_DOMINATORS, latch, header)) { /* Shared headers should be eliminated by now. */ if (more_latches) abort (); more_latches = 1; SET_BIT (headers, header->index); num_loops++; } } } /* Allocate loop structures. */ loops->parray = xcalloc (num_loops + 1, sizeof (struct loop *)); /* Dummy loop containing whole function. */ loops->parray[0] = xcalloc (1, sizeof (struct loop)); loops->parray[0]->next = NULL; loops->parray[0]->inner = NULL; loops->parray[0]->outer = NULL; loops->parray[0]->depth = 0; loops->parray[0]->pred = NULL; loops->parray[0]->num_nodes = n_basic_blocks + 2; loops->parray[0]->latch = EXIT_BLOCK_PTR; loops->parray[0]->header = ENTRY_BLOCK_PTR; ENTRY_BLOCK_PTR->loop_father = loops->parray[0]; EXIT_BLOCK_PTR->loop_father = loops->parray[0]; loops->tree_root = loops->parray[0]; /* Find and record information about all the natural loops in the CFG. */ loops->num = 1; FOR_EACH_BB (bb) bb->loop_father = loops->tree_root; if (num_loops) { /* Compute depth first search order of the CFG so that outer natural loops will be found before inner natural loops. */ dfs_order = xmalloc (n_basic_blocks * sizeof (int)); rc_order = xmalloc (n_basic_blocks * sizeof (int)); flow_depth_first_order_compute (dfs_order, rc_order); /* Save CFG derived information to avoid recomputing it. */ loops->cfg.dfs_order = dfs_order; loops->cfg.rc_order = rc_order; num_loops = 1; for (b = 0; b < n_basic_blocks; b++) { struct loop *loop; /* Search the nodes of the CFG in reverse completion order so that we can find outer loops first. */ if (!TEST_BIT (headers, rc_order[b])) continue; header = BASIC_BLOCK (rc_order[b]); loop = loops->parray[num_loops] = xcalloc (1, sizeof (struct loop)); loop->header = header; loop->num = num_loops; num_loops++; /* Look for the latch for this header block. */ for (e = header->pred; e; e = e->pred_next) { basic_block latch = e->src; if (latch != ENTRY_BLOCK_PTR && dominated_by_p (CDI_DOMINATORS, latch, header)) { loop->latch = latch; break; } } flow_loop_tree_node_add (header->loop_father, loop); loop->num_nodes = flow_loop_nodes_find (loop->header, loop); } /* Assign the loop nesting depth and enclosed loop level for each loop. */ loops->levels = flow_loops_level_compute (loops); /* Scan the loops. */ for (i = 1; i < num_loops; i++) flow_loop_scan (loops->parray[i], flags); loops->num = num_loops; } else { free_dominance_info (CDI_DOMINATORS); } sbitmap_free (headers); loops->state = 0; #ifdef ENABLE_CHECKING verify_flow_info (); verify_loop_structure (loops); #endif return loops->num; }
/* Takes care of merging natural loops with shared headers. */ static void canonicalize_loop_headers (void) { basic_block header; edge e; /* Compute the dominators. */ calculate_dominance_info (CDI_DOMINATORS); alloc_aux_for_blocks (sizeof (int)); alloc_aux_for_edges (sizeof (int)); /* Split blocks so that each loop has only single latch. */ FOR_EACH_BB (header) { int num_latches = 0; int have_abnormal_edge = 0; for (e = header->pred; e; e = e->pred_next) { basic_block latch = e->src; if (e->flags & EDGE_ABNORMAL) have_abnormal_edge = 1; if (latch != ENTRY_BLOCK_PTR && dominated_by_p (CDI_DOMINATORS, latch, header)) { num_latches++; LATCH_EDGE (e) = 1; } } if (have_abnormal_edge) HEADER_BLOCK (header) = 0; else HEADER_BLOCK (header) = num_latches; } free_dominance_info (CDI_DOMINATORS); if (HEADER_BLOCK (ENTRY_BLOCK_PTR->succ->dest)) { basic_block bb; /* We could not redirect edges freely here. On the other hand, we can simply split the edge from entry block. */ bb = split_edge (ENTRY_BLOCK_PTR->succ); alloc_aux_for_edge (bb->succ, sizeof (int)); LATCH_EDGE (bb->succ) = 0; alloc_aux_for_block (bb, sizeof (int)); HEADER_BLOCK (bb) = 0; } FOR_EACH_BB (header) { int num_latch; int want_join_latch; int max_freq, is_heavy; edge heavy; if (!HEADER_BLOCK (header)) continue; num_latch = HEADER_BLOCK (header); want_join_latch = (num_latch > 1); if (!want_join_latch) continue; /* Find a heavy edge. */ is_heavy = 1; heavy = NULL; max_freq = 0; for (e = header->pred; e; e = e->pred_next) if (LATCH_EDGE (e) && EDGE_FREQUENCY (e) > max_freq) max_freq = EDGE_FREQUENCY (e); for (e = header->pred; e; e = e->pred_next) if (LATCH_EDGE (e) && EDGE_FREQUENCY (e) >= max_freq / HEAVY_EDGE_RATIO) { if (heavy) { is_heavy = 0; break; } else heavy = e; } if (is_heavy) { basic_block new_header = make_forwarder_block (header, true, true, heavy, 0); if (num_latch > 2) make_forwarder_block (new_header, true, false, NULL, 1); } else make_forwarder_block (header, true, false, NULL, 1); } free_aux_for_blocks (); free_aux_for_edges (); }
static bool split_paths () { bool changed = false; loop_p loop; loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); initialize_original_copy_tables (); calculate_dominance_info (CDI_DOMINATORS); FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) { /* Only split paths if we are optimizing this loop for speed. */ if (!optimize_loop_for_speed_p (loop)) continue; /* See if there is a block that we can duplicate to split the path to the loop latch. */ basic_block bb = find_block_to_duplicate_for_splitting_paths (loop->latch); /* BB is the merge point for an IF-THEN-ELSE we want to transform. Essentially we want to create a duplicate of bb and redirect the first predecessor of BB to the duplicate (leaving the second predecessor as is. This will split the path leading to the latch re-using BB to avoid useless copying. */ if (bb && is_feasible_trace (bb)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Duplicating join block %d into predecessor paths\n", bb->index); basic_block pred0 = EDGE_PRED (bb, 0)->src; transform_duplicate (pred0, bb); changed = true; /* If BB has an outgoing edge marked as IRREDUCIBLE, then duplicating BB may result in an irreducible region turning into a natural loop. Long term we might want to hook this into the block duplication code, but as we've seen with similar changes for edge removal, that can be somewhat risky. */ if (EDGE_SUCC (bb, 0)->flags & EDGE_IRREDUCIBLE_LOOP || EDGE_SUCC (bb, 1)->flags & EDGE_IRREDUCIBLE_LOOP) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Join block %d has EDGE_IRREDUCIBLE_LOOP set. " "Scheduling loop fixups.\n", bb->index); loops_state_set (LOOPS_NEED_FIXUP); } } } loop_optimizer_finalize (); free_original_copy_tables (); return changed; }