static unsigned int execute_vect_slp (void) { basic_block bb; init_stmt_vec_info_vec (); FOR_EACH_BB (bb) { vect_location = find_bb_location (bb); if (vect_slp_analyze_bb (bb)) { vect_slp_transform_bb (bb); if (dump_enabled_p ()) dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, "basic block vectorized using SLP\n"); } } free_stmt_vec_info_vec (); return 0; }
static bool try_unroll_loop_completely (struct loop *loop, edge exit, tree niter, enum unroll_level ul, HOST_WIDE_INT maxiter, location_t locus) { unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns; struct loop_size size; bool n_unroll_found = false; edge edge_to_cancel = NULL; int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS; /* See if we proved number of iterations to be low constant. EXIT is an edge that will be removed in all but last iteration of the loop. EDGE_TO_CACNEL is an edge that will be removed from the last iteration of the unrolled sequence and is expected to make the final loop not rolling. If the number of execution of loop is determined by standard induction variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving from the iv test. */ if (tree_fits_uhwi_p (niter)) { n_unroll = tree_to_uhwi (niter); n_unroll_found = true; edge_to_cancel = EDGE_SUCC (exit->src, 0); if (edge_to_cancel == exit) edge_to_cancel = EDGE_SUCC (exit->src, 1); } /* We do not know the number of iterations and thus we can not eliminate the EXIT edge. */ else exit = NULL; /* See if we can improve our estimate by using recorded loop bounds. */ if (maxiter >= 0 && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll)) { n_unroll = maxiter; n_unroll_found = true; /* Loop terminates before the IV variable test, so we can not remove it in the last iteration. */ edge_to_cancel = NULL; } if (!n_unroll_found) return false; if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d " "(--param max-completely-peeled-times limit reached).\n", loop->num); return false; } if (!edge_to_cancel) edge_to_cancel = loop_edge_to_cancel (loop); if (n_unroll) { sbitmap wont_exit; edge e; unsigned i; bool large; vec<edge> to_remove = vNULL; if (ul == UL_SINGLE_ITER) return false; large = tree_estimate_loop_size (loop, exit, edge_to_cancel, &size, PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); ninsns = size.overall; if (large) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", loop->num); return false; } unr_insns = estimated_unrolled_size (&size, n_unroll); if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, " Loop size: %d\n", (int) ninsns); fprintf (dump_file, " Estimated size after unrolling: %d\n", (int) unr_insns); } /* If the code is going to shrink, we don't need to be extra cautious on guessing if the unrolling is going to be profitable. */ if (unr_insns /* If there is IV variable that will become constant, we save one instruction in the loop prologue we do not account otherwise. */ <= ninsns + (size.constant_iv != false)) ; /* We unroll only inner loops, because we do not consider it profitable otheriwse. We still can cancel loopback edge of not rolling loop; this is always a good idea. */ else if (ul == UL_NO_GROWTH) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", loop->num); return false; } /* Outer loops tend to be less interesting candidates for complete unrolling unless we can do a lot of propagation into the inner loop body. For now we disable outer loop unrolling when the code would grow. */ else if (loop->inner) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d: " "it is not innermost and code would grow.\n", loop->num); return false; } /* If there is call on a hot path through the loop, then there is most probably not much to optimize. */ else if (size.num_non_pure_calls_on_hot_path) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d: " "contains call and code would grow.\n", loop->num); return false; } /* If there is pure/const call in the function, then we can still optimize the unrolled loop body if it contains some other interesting code than the calls and code storing or cumulating the return value. */ else if (size.num_pure_calls_on_hot_path /* One IV increment, one test, one ivtmp store and one useful stmt. That is about minimal loop doing pure call. */ && (size.non_call_stmts_on_hot_path <= 3 + size.num_pure_calls_on_hot_path)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d: " "contains just pure calls and code would grow.\n", loop->num); return false; } /* Complette unrolling is major win when control flow is removed and one big basic block is created. If the loop contains control flow the optimization may still be a win because of eliminating the loop overhead but it also may blow the branch predictor tables. Limit number of branches on the hot path through the peeled sequence. */ else if (size.num_branches_on_hot_path * (int)n_unroll > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d: " " number of branches on hot path in the unrolled sequence" " reach --param max-peel-branches limit.\n", loop->num); return false; } else if (unr_insns > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d: " "(--param max-completely-peeled-insns limit reached).\n", loop->num); return false; } dump_printf_loc (report_flags, locus, "loop turned into non-loop; it never loops.\n"); initialize_original_copy_tables (); wont_exit = sbitmap_alloc (n_unroll + 1); bitmap_ones (wont_exit); bitmap_clear_bit (wont_exit, 0); if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), n_unroll, wont_exit, exit, &to_remove, DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL)) { free_original_copy_tables (); free (wont_exit); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Failed to duplicate the loop\n"); return false; } FOR_EACH_VEC_ELT (to_remove, i, e) { bool ok = remove_path (e); gcc_assert (ok); } to_remove.release (); free (wont_exit); free_original_copy_tables (); }
unsigned vectorize_loops (void) { unsigned int i; unsigned int num_vectorized_loops = 0; unsigned int vect_loops_num; loop_iterator li; struct loop *loop; vect_loops_num = number_of_loops (); /* Bail out if there are no loops. */ if (vect_loops_num <= 1) return 0; init_stmt_vec_info_vec (); /* ----------- Analyze loops. ----------- */ /* If some loop was duplicated, it gets bigger number than all previously defined loops. This fact allows us to run only over initial loops skipping newly generated ones. */ FOR_EACH_LOOP (li, loop, 0) if (optimize_loop_nest_for_speed_p (loop)) { loop_vec_info loop_vinfo; vect_location = find_loop_location (loop); if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOC && dump_enabled_p ()) dump_printf (MSG_ALL, "\nAnalyzing loop at %s:%d\n", LOC_FILE (vect_location), LOC_LINE (vect_location)); loop_vinfo = vect_analyze_loop (loop); loop->aux = loop_vinfo; if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) continue; if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOC && dump_enabled_p ()) dump_printf (MSG_ALL, "\n\nVectorizing loop at %s:%d\n", LOC_FILE (vect_location), LOC_LINE (vect_location)); vect_transform_loop (loop_vinfo); num_vectorized_loops++; } vect_location = UNKNOWN_LOC; statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); if (dump_enabled_p () || (num_vectorized_loops > 0 && dump_enabled_p ())) dump_printf_loc (MSG_ALL, vect_location, "vectorized %u loops in function.\n", num_vectorized_loops); /* ----------- Finalize. ----------- */ for (i = 1; i < vect_loops_num; i++) { loop_vec_info loop_vinfo; loop = get_loop (i); if (!loop) continue; loop_vinfo = (loop_vec_info) loop->aux; destroy_loop_vec_info (loop_vinfo, true); loop->aux = NULL; } free_stmt_vec_info_vec (); return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0; }
unsigned vectorize_loops (void) { unsigned int i; unsigned int num_vectorized_loops = 0; unsigned int vect_loops_num; loop_iterator li; struct loop *loop; vect_loops_num = number_of_loops (cfun); /* Bail out if there are no loops. */ if (vect_loops_num <= 1) return 0; init_stmt_vec_info_vec (); /* ----------- Analyze loops. ----------- */ /* If some loop was duplicated, it gets bigger number than all previously defined loops. This fact allows us to run only over initial loops skipping newly generated ones. */ FOR_EACH_LOOP (li, loop, 0) if (optimize_loop_nest_for_speed_p (loop)) { loop_vec_info loop_vinfo; vect_location = find_loop_location (loop); if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOC && dump_enabled_p ()) dump_printf (MSG_NOTE, "\nAnalyzing loop at %s:%d\n", LOC_FILE (vect_location), LOC_LINE (vect_location)); loop_vinfo = vect_analyze_loop (loop); loop->aux = loop_vinfo; if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) continue; if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOC && dump_enabled_p ()) dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, "Vectorized loop\n"); vect_transform_loop (loop_vinfo); num_vectorized_loops++; } vect_location = UNKNOWN_LOC; statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); if (dump_enabled_p () || (num_vectorized_loops > 0 && dump_enabled_p ())) dump_printf_loc (MSG_NOTE, vect_location, "vectorized %u loops in function.\n", num_vectorized_loops); /* ----------- Finalize. ----------- */ for (i = 1; i < vect_loops_num; i++) { loop_vec_info loop_vinfo; loop = get_loop (cfun, i); if (!loop) continue; loop_vinfo = (loop_vec_info) loop->aux; destroy_loop_vec_info (loop_vinfo, true); loop->aux = NULL; } free_stmt_vec_info_vec (); if (num_vectorized_loops > 0) { /* If we vectorized any loop only virtual SSA form needs to be updated. ??? Also while we try hard to update loop-closed SSA form we fail to properly do this in some corner-cases (see PR56286). */ rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals); return TODO_cleanup_cfg; } return 0; }