static bool schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor, unsigned ahead) { unsigned remaining_prefetch_slots, n_prefetches, prefetch_slots; unsigned slots_per_prefetch; struct mem_ref *ref; bool any = false; /* At most SIMULTANEOUS_PREFETCHES should be running at the same time. */ remaining_prefetch_slots = SIMULTANEOUS_PREFETCHES; /* The prefetch will run for AHEAD iterations of the original loop, i.e., AHEAD / UNROLL_FACTOR iterations of the unrolled loop. In each iteration, it will need a prefetch slot. */ slots_per_prefetch = (ahead + unroll_factor / 2) / unroll_factor; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Each prefetch instruction takes %u prefetch slots.\n", slots_per_prefetch); /* For now we just take memory references one by one and issue prefetches for as many as possible. The groups are sorted starting with the largest step, since the references with large step are more likely to cause many cache misses. */ for (; groups; groups = groups->next) for (ref = groups->refs; ref; ref = ref->next) { if (!should_issue_prefetch_p (ref)) continue; /* If we need to prefetch the reference each PREFETCH_MOD iterations, and we unroll the loop UNROLL_FACTOR times, we need to insert ceil (UNROLL_FACTOR / PREFETCH_MOD) instructions in each iteration. */ n_prefetches = ((unroll_factor + ref->prefetch_mod - 1) / ref->prefetch_mod); prefetch_slots = n_prefetches * slots_per_prefetch; /* If more than half of the prefetches would be lost anyway, do not issue the prefetch. */ if (2 * remaining_prefetch_slots < prefetch_slots) continue; ref->issue_prefetch_p = true; if (remaining_prefetch_slots <= prefetch_slots) return true; remaining_prefetch_slots -= prefetch_slots; any = true; } return any; }
static bool anything_to_prefetch_p (struct mem_ref_group *groups) { struct mem_ref *ref; for (; groups; groups = groups->next) for (ref = groups->refs; ref; ref = ref->next) if (should_issue_prefetch_p (ref)) return true; return false; }
static unsigned determine_unroll_factor (struct loop *loop, struct mem_ref_group *refs, unsigned ahead, unsigned ninsns, struct tree_niter_desc *desc) { unsigned upper_bound, size_factor, constraint_factor; unsigned factor, max_mod_constraint, ahead_factor; struct mem_ref_group *agp; struct mem_ref *ref; upper_bound = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); /* First check whether the loop is not too large to unroll. */ size_factor = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / ninsns; if (size_factor <= 1) return 1; if (size_factor < upper_bound) upper_bound = size_factor; max_mod_constraint = 1; for (agp = refs; agp; agp = agp->next) for (ref = agp->refs; ref; ref = ref->next) if (should_issue_prefetch_p (ref) && ref->prefetch_mod > max_mod_constraint) max_mod_constraint = ref->prefetch_mod; /* Set constraint_factor as large as needed to be able to satisfy the largest modulo constraint. */ constraint_factor = max_mod_constraint; /* If ahead is too large in comparison with the number of available prefetches, unroll the loop as much as needed to be able to prefetch at least partially some of the references in the loop. */ ahead_factor = ((ahead + SIMULTANEOUS_PREFETCHES - 1) / SIMULTANEOUS_PREFETCHES); /* Unroll as much as useful, but bound the code size growth. */ if (constraint_factor < ahead_factor) factor = ahead_factor; else factor = constraint_factor; if (factor > upper_bound) factor = upper_bound; if (!should_unroll_loop_p (loop, desc, factor)) return 1; return factor; }
static unsigned determine_unroll_factor (struct loop *loop, struct mem_ref_group *refs, unsigned ninsns, struct tree_niter_desc *desc, HOST_WIDE_INT est_niter) { unsigned upper_bound; unsigned nfactor, factor, mod_constraint; struct mem_ref_group *agp; struct mem_ref *ref; /* First check whether the loop is not too large to unroll. We ignore PARAM_MAX_UNROLL_TIMES, because for small loops, it prevented us from unrolling them enough to make exactly one cache line covered by each iteration. Also, the goal of PARAM_MAX_UNROLL_TIMES is to prevent us from unrolling the loops too many times in cases where we only expect gains from better scheduling and decreasing loop overhead, which is not the case here. */ upper_bound = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / ninsns; /* If we unrolled the loop more times than it iterates, the unrolled version of the loop would be never entered. */ if (est_niter >= 0 && est_niter < (HOST_WIDE_INT) upper_bound) upper_bound = est_niter; if (upper_bound <= 1) return 1; /* Choose the factor so that we may prefetch each cache just once, but bound the unrolling by UPPER_BOUND. */ factor = 1; for (agp = refs; agp; agp = agp->next) for (ref = agp->refs; ref; ref = ref->next) if (should_issue_prefetch_p (ref)) { mod_constraint = ref->prefetch_mod; nfactor = least_common_multiple (mod_constraint, factor); if (nfactor <= upper_bound) factor = nfactor; } if (!should_unroll_loop_p (loop, desc, factor)) return 1; return factor; }
static bool schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor, unsigned ahead) { unsigned max_prefetches, n_prefetches; struct mem_ref *ref; bool any = false; max_prefetches = (SIMULTANEOUS_PREFETCHES * unroll_factor) / ahead; if (max_prefetches > (unsigned) SIMULTANEOUS_PREFETCHES) max_prefetches = SIMULTANEOUS_PREFETCHES; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Max prefetches to issue: %d.\n", max_prefetches); if (!max_prefetches) return false; /* For now we just take memory references one by one and issue prefetches for as many as possible. The groups are sorted starting with the largest step, since the references with large step are more likely to cause many cache misses. */ for (; groups; groups = groups->next) for (ref = groups->refs; ref; ref = ref->next) { if (!should_issue_prefetch_p (ref)) continue; ref->issue_prefetch_p = true; /* If prefetch_mod is less then unroll_factor, we need to insert several prefetches for the reference. */ n_prefetches = ((unroll_factor + ref->prefetch_mod - 1) / ref->prefetch_mod); if (max_prefetches <= n_prefetches) return true; max_prefetches -= n_prefetches; any = true; } return any; }