unsigned int tree_ssa_unswitch_loops (void) { loop_iterator li; struct loop *loop; bool changed = false; HOST_WIDE_INT iterations; /* Go through inner loops (only original ones). */ FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, ";; Considering loop %d\n", loop->num); /* Do not unswitch in cold regions. */ if (optimize_loop_for_size_p (loop)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, ";; Not unswitching cold loops\n"); continue; } /* The loop should not be too large, to limit code growth. */ if (tree_num_loop_insns (loop, &eni_size_weights) > (unsigned) PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, ";; Not unswitching, loop too big\n"); continue; } /* If the loop is not expected to iterate, there is no need for unswitching. */ iterations = estimated_loop_iterations_int (loop); if (iterations >= 0 && iterations <= 1) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, ";; Not unswitching, loop is not expected to iterate\n"); continue; } changed |= tree_unswitch_single_loop (loop, 0); } if (changed) return TODO_cleanup_cfg; return 0; }
/* Unswitch single LOOP. COND_CHECKED holds list of conditions we already unswitched on and are therefore known to be true in this LOOP. NUM is number of unswitchings done; do not allow it to grow too much, it is too easy to create example on that the code would grow exponentially. Returns true LOOP was unswitched. */ static bool unswitch_single_loop (struct loop *loop, rtx cond_checked, int num) { basic_block *bbs; struct loop *nloop; unsigned i; rtx cond, rcond = NULL_RTX, conds, rconds, acond, cinsn; int repeat; edge e; HOST_WIDE_INT iterations; /* Do not unswitch too much. */ if (num > PARAM_VALUE (PARAM_MAX_UNSWITCH_LEVEL)) { if (dump_file) fprintf (dump_file, ";; Not unswitching anymore, hit max level\n"); return false; } /* Only unswitch innermost loops. */ if (loop->inner) { if (dump_file) fprintf (dump_file, ";; Not unswitching, not innermost loop\n"); return false; } /* We must be able to duplicate loop body. */ if (!can_duplicate_loop_p (loop)) { if (dump_file) fprintf (dump_file, ";; Not unswitching, can't duplicate loop\n"); return false; } /* The loop should not be too large, to limit code growth. */ if (num_loop_insns (loop) > PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS)) { if (dump_file) fprintf (dump_file, ";; Not unswitching, loop too big\n"); return false; } /* Do not unswitch in cold areas. */ if (optimize_loop_for_size_p (loop)) { if (dump_file) fprintf (dump_file, ";; Not unswitching, not hot area\n"); return false; } /* Nor if the loop usually does not roll. */ iterations = estimated_loop_iterations_int (loop); if (iterations >= 0 && iterations <= 1) { if (dump_file) fprintf (dump_file, ";; Not unswitching, loop iterations < 1\n"); return false; } do { repeat = 0; cinsn = NULL_RTX; /* Find a bb to unswitch on. */ bbs = get_loop_body (loop); iv_analysis_loop_init (loop); for (i = 0; i < loop->num_nodes; i++) if ((cond = may_unswitch_on (bbs[i], loop, &cinsn))) break; if (i == loop->num_nodes) { free (bbs); return false; } if (cond != const0_rtx && cond != const_true_rtx) { rcond = reversed_condition (cond); if (rcond) rcond = canon_condition (rcond); /* Check whether the result can be predicted. */ for (acond = cond_checked; acond; acond = XEXP (acond, 1)) simplify_using_condition (XEXP (acond, 0), &cond, NULL); } if (cond == const_true_rtx) { /* Remove false path. */ e = FALLTHRU_EDGE (bbs[i]); remove_path (e); free (bbs); repeat = 1; } else if (cond == const0_rtx) { /* Remove true path. */ e = BRANCH_EDGE (bbs[i]); remove_path (e); free (bbs); repeat = 1; } } while (repeat); /* We found the condition we can unswitch on. */ conds = alloc_EXPR_LIST (0, cond, cond_checked); if (rcond) rconds = alloc_EXPR_LIST (0, rcond, cond_checked); else rconds = cond_checked; if (dump_file) fprintf (dump_file, ";; Unswitching loop\n"); /* Unswitch the loop on this condition. */ nloop = unswitch_loop (loop, bbs[i], copy_rtx_if_shared (cond), cinsn); gcc_assert (nloop); /* Invoke itself on modified loops. */ unswitch_single_loop (nloop, rconds, num + 1); unswitch_single_loop (loop, conds, num + 1); free_EXPR_LIST_node (conds); if (rcond) free_EXPR_LIST_node (rconds); free (bbs); return true; }
static bool loop_prefetch_arrays (struct loop *loop) { struct mem_ref_group *refs; unsigned ahead, ninsns, time, unroll_factor; HOST_WIDE_INT est_niter; struct tree_niter_desc desc; bool unrolled = false, no_other_refs; if (optimize_loop_nest_for_size_p (loop)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " ignored (cold area)\n"); return false; } /* Step 1: gather the memory references. */ refs = gather_memory_references (loop, &no_other_refs); /* Step 2: estimate the reuse effects. */ prune_by_reuse (refs); if (!anything_to_prefetch_p (refs)) goto fail; determine_loop_nest_reuse (loop, refs, no_other_refs); /* Step 3: determine the ahead and unroll factor. */ /* FIXME: the time should be weighted by the probabilities of the blocks in the loop body. */ time = tree_num_loop_insns (loop, &eni_time_weights); ahead = (PREFETCH_LATENCY + time - 1) / time; est_niter = estimated_loop_iterations_int (loop, false); /* The prefetches will run for AHEAD iterations of the original loop. Unless the loop rolls at least AHEAD times, prefetching the references does not make sense. */ if (est_niter >= 0 && est_niter <= (HOST_WIDE_INT) ahead) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not prefetching -- loop estimated to roll only %d times\n", (int) est_niter); goto fail; } mark_nontemporal_stores (loop, refs); ninsns = tree_num_loop_insns (loop, &eni_size_weights); unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc, est_niter); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Ahead %d, unroll factor %d\n", ahead, unroll_factor); /* Step 4: what to prefetch? */ if (!schedule_prefetches (refs, unroll_factor, ahead)) goto fail; /* Step 5: unroll the loop. TODO -- peeling of first and last few iterations so that we do not issue superfluous prefetches. */ if (unroll_factor != 1) { tree_unroll_loop (loop, unroll_factor, single_dom_exit (loop), &desc); unrolled = true; } /* Step 6: issue the prefetches. */ issue_prefetches (refs, unroll_factor, ahead); fail: release_mem_refs (refs); return unrolled; }
static void determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs, bool no_other_refs) { struct loop *nest, *aloop; VEC (data_reference_p, heap) *datarefs = NULL; VEC (ddr_p, heap) *dependences = NULL; struct mem_ref_group *gr; struct mem_ref *ref, *refb; VEC (loop_p, heap) *vloops = NULL; unsigned *loop_data_size; unsigned i, j, n; unsigned volume, dist, adist; HOST_WIDE_INT vol; data_reference_p dr; ddr_p dep; if (loop->inner) return; /* Find the outermost loop of the loop nest of loop (we require that there are no sibling loops inside the nest). */ nest = loop; while (1) { aloop = loop_outer (nest); if (aloop == current_loops->tree_root || aloop->inner->next) break; nest = aloop; } /* For each loop, determine the amount of data accessed in each iteration. We use this to estimate whether the reference is evicted from the cache before its reuse. */ find_loop_nest (nest, &vloops); n = VEC_length (loop_p, vloops); loop_data_size = XNEWVEC (unsigned, n); volume = volume_of_references (refs); i = n; while (i-- != 0) { loop_data_size[i] = volume; /* Bound the volume by the L2 cache size, since above this bound, all dependence distances are equivalent. */ if (volume > L2_CACHE_SIZE_BYTES) continue; aloop = VEC_index (loop_p, vloops, i); vol = estimated_loop_iterations_int (aloop, false); if (vol < 0) vol = expected_loop_iterations (aloop); volume *= vol; } /* Prepare the references in the form suitable for data dependence analysis. We ignore unanalyzable data references (the results are used just as a heuristics to estimate temporality of the references, hence we do not need to worry about correctness). */ for (gr = refs; gr; gr = gr->next) for (ref = gr->refs; ref; ref = ref->next) { dr = create_data_ref (nest, ref->mem, ref->stmt, !ref->write_p); if (dr) { ref->reuse_distance = volume; dr->aux = ref; VEC_safe_push (data_reference_p, heap, datarefs, dr); } else no_other_refs = false; } for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) { dist = self_reuse_distance (dr, loop_data_size, n, loop); ref = (struct mem_ref *) dr->aux; if (ref->reuse_distance > dist) ref->reuse_distance = dist; if (no_other_refs) ref->independent_p = true; } compute_all_dependences (datarefs, &dependences, vloops, true); for (i = 0; VEC_iterate (ddr_p, dependences, i, dep); i++) { if (DDR_ARE_DEPENDENT (dep) == chrec_known) continue; ref = (struct mem_ref *) DDR_A (dep)->aux; refb = (struct mem_ref *) DDR_B (dep)->aux; if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know || DDR_NUM_DIST_VECTS (dep) == 0) { /* If the dependence cannot be analyzed, assume that there might be a reuse. */ dist = 0; ref->independent_p = false; refb->independent_p = false; } else { /* The distance vectors are normalized to be always lexicographically positive, hence we cannot tell just from them whether DDR_A comes before DDR_B or vice versa. However, it is not important, anyway -- if DDR_A is close to DDR_B, then it is either reused in DDR_B (and it is not nontemporal), or it reuses the value of DDR_B in cache (and marking it as nontemporal would not affect anything). */ dist = volume; for (j = 0; j < DDR_NUM_DIST_VECTS (dep); j++) { adist = volume_of_dist_vector (DDR_DIST_VECT (dep, j), loop_data_size, n); /* If this is a dependence in the innermost loop (i.e., the distances in all superloops are zero) and it is not the trivial self-dependence with distance zero, record that the references are not completely independent. */ if (lambda_vector_zerop (DDR_DIST_VECT (dep, j), n - 1) && (ref != refb || DDR_DIST_VECT (dep, j)[n-1] != 0)) { ref->independent_p = false; refb->independent_p = false; } /* Ignore accesses closer than L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION, so that we use nontemporal prefetches e.g. if single memory location is accessed several times in a single iteration of the loop. */ if (adist < L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION) continue; if (adist < dist) dist = adist; } } if (ref->reuse_distance > dist) ref->reuse_distance = dist; if (refb->reuse_distance > dist) refb->reuse_distance = dist; } free_dependence_relations (dependences); free_data_refs (datarefs); free (loop_data_size); if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Reuse distances:\n"); for (gr = refs; gr; gr = gr->next) for (ref = gr->refs; ref; ref = ref->next) fprintf (dump_file, " ref %p distance %u\n", (void *) ref, ref->reuse_distance); } }