static bool loop_prefetch_arrays (struct loops *loops, struct loop *loop) { struct mem_ref_group *refs; unsigned ahead, ninsns, unroll_factor; struct tree_niter_desc desc; bool unrolled = false; /* Step 1: gather the memory references. */ refs = gather_memory_references (loop); /* Step 2: estimate the reuse effects. */ prune_by_reuse (refs); if (!anything_to_prefetch_p (refs)) goto fail; /* Step 3: determine the ahead and unroll factor. */ /* FIXME: We should use not size of the loop, but the average number of instructions executed per iteration of the loop. */ ninsns = tree_num_loop_insns (loop); ahead = (PREFETCH_LATENCY + ninsns - 1) / ninsns; unroll_factor = determine_unroll_factor (loop, refs, ahead, ninsns, &desc); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Ahead %d, unroll factor %d\n", ahead, unroll_factor); /* If the loop rolls less than the required unroll factor, prefetching is useless. */ if (unroll_factor > 1 && cst_and_fits_in_hwi (desc.niter) && (unsigned HOST_WIDE_INT) int_cst_value (desc.niter) < unroll_factor) goto fail; /* Step 4: what to prefetch? */ if (!schedule_prefetches (refs, unroll_factor, ahead)) goto fail; /* Step 5: unroll the loop. TODO -- peeling of first and last few iterations so that we do not issue superfluous prefetches. */ if (unroll_factor != 1) { tree_unroll_loop (loops, loop, unroll_factor, single_dom_exit (loop), &desc); unrolled = true; } /* Step 6: issue the prefetches. */ issue_prefetches (refs, unroll_factor, ahead); fail: release_mem_refs (refs); return unrolled; }
static bool loop_prefetch_arrays (struct loop *loop) { struct mem_ref_group *refs; unsigned ahead, ninsns, time, unroll_factor; HOST_WIDE_INT est_niter; struct tree_niter_desc desc; bool unrolled = false, no_other_refs; if (optimize_loop_nest_for_size_p (loop)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " ignored (cold area)\n"); return false; } /* Step 1: gather the memory references. */ refs = gather_memory_references (loop, &no_other_refs); /* Step 2: estimate the reuse effects. */ prune_by_reuse (refs); if (!anything_to_prefetch_p (refs)) goto fail; determine_loop_nest_reuse (loop, refs, no_other_refs); /* Step 3: determine the ahead and unroll factor. */ /* FIXME: the time should be weighted by the probabilities of the blocks in the loop body. */ time = tree_num_loop_insns (loop, &eni_time_weights); ahead = (PREFETCH_LATENCY + time - 1) / time; est_niter = estimated_loop_iterations_int (loop, false); /* The prefetches will run for AHEAD iterations of the original loop. Unless the loop rolls at least AHEAD times, prefetching the references does not make sense. */ if (est_niter >= 0 && est_niter <= (HOST_WIDE_INT) ahead) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not prefetching -- loop estimated to roll only %d times\n", (int) est_niter); goto fail; } mark_nontemporal_stores (loop, refs); ninsns = tree_num_loop_insns (loop, &eni_size_weights); unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc, est_niter); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Ahead %d, unroll factor %d\n", ahead, unroll_factor); /* Step 4: what to prefetch? */ if (!schedule_prefetches (refs, unroll_factor, ahead)) goto fail; /* Step 5: unroll the loop. TODO -- peeling of first and last few iterations so that we do not issue superfluous prefetches. */ if (unroll_factor != 1) { tree_unroll_loop (loop, unroll_factor, single_dom_exit (loop), &desc); unrolled = true; } /* Step 6: issue the prefetches. */ issue_prefetches (refs, unroll_factor, ahead); fail: release_mem_refs (refs); return unrolled; }