Пример #1
0
static bool
schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor,
		     unsigned ahead)
{
  unsigned remaining_prefetch_slots, n_prefetches, prefetch_slots;
  unsigned slots_per_prefetch;
  struct mem_ref *ref;
  bool any = false;

  /* At most SIMULTANEOUS_PREFETCHES should be running at the same time.  */
  remaining_prefetch_slots = SIMULTANEOUS_PREFETCHES;

  /* The prefetch will run for AHEAD iterations of the original loop, i.e.,
     AHEAD / UNROLL_FACTOR iterations of the unrolled loop.  In each iteration,
     it will need a prefetch slot.  */
  slots_per_prefetch = (ahead + unroll_factor / 2) / unroll_factor;
  if (dump_file && (dump_flags & TDF_DETAILS))
    fprintf (dump_file, "Each prefetch instruction takes %u prefetch slots.\n",
	     slots_per_prefetch);

  /* For now we just take memory references one by one and issue
     prefetches for as many as possible.  The groups are sorted
     starting with the largest step, since the references with
     large step are more likely to cause many cache misses.  */

  for (; groups; groups = groups->next)
    for (ref = groups->refs; ref; ref = ref->next)
      {
	if (!should_issue_prefetch_p (ref))
	  continue;

	/* If we need to prefetch the reference each PREFETCH_MOD iterations,
	   and we unroll the loop UNROLL_FACTOR times, we need to insert
	   ceil (UNROLL_FACTOR / PREFETCH_MOD) instructions in each
	   iteration.  */
	n_prefetches = ((unroll_factor + ref->prefetch_mod - 1)
			/ ref->prefetch_mod);
	prefetch_slots = n_prefetches * slots_per_prefetch;

	/* If more than half of the prefetches would be lost anyway, do not
	   issue the prefetch.  */
	if (2 * remaining_prefetch_slots < prefetch_slots)
	  continue;

	ref->issue_prefetch_p = true;

	if (remaining_prefetch_slots <= prefetch_slots)
	  return true;
	remaining_prefetch_slots -= prefetch_slots;
	any = true;
      }

  return any;
}
Пример #2
0
static bool
anything_to_prefetch_p (struct mem_ref_group *groups)
{
  struct mem_ref *ref;

  for (; groups; groups = groups->next)
    for (ref = groups->refs; ref; ref = ref->next)
      if (should_issue_prefetch_p (ref))
	return true;

  return false;
}
Пример #3
0
static unsigned
determine_unroll_factor (struct loop *loop, struct mem_ref_group *refs,
                         unsigned ahead, unsigned ninsns,
                         struct tree_niter_desc *desc)
{
    unsigned upper_bound, size_factor, constraint_factor;
    unsigned factor, max_mod_constraint, ahead_factor;
    struct mem_ref_group *agp;
    struct mem_ref *ref;

    upper_bound = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);

    /* First check whether the loop is not too large to unroll.  */
    size_factor = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / ninsns;
    if (size_factor <= 1)
        return 1;

    if (size_factor < upper_bound)
        upper_bound = size_factor;

    max_mod_constraint = 1;
    for (agp = refs; agp; agp = agp->next)
        for (ref = agp->refs; ref; ref = ref->next)
            if (should_issue_prefetch_p (ref)
                    && ref->prefetch_mod > max_mod_constraint)
                max_mod_constraint = ref->prefetch_mod;

    /* Set constraint_factor as large as needed to be able to satisfy the
       largest modulo constraint.  */
    constraint_factor = max_mod_constraint;

    /* If ahead is too large in comparison with the number of available
       prefetches, unroll the loop as much as needed to be able to prefetch
       at least partially some of the references in the loop.  */
    ahead_factor = ((ahead + SIMULTANEOUS_PREFETCHES - 1)
                    / SIMULTANEOUS_PREFETCHES);

    /* Unroll as much as useful, but bound the code size growth.  */
    if (constraint_factor < ahead_factor)
        factor = ahead_factor;
    else
        factor = constraint_factor;
    if (factor > upper_bound)
        factor = upper_bound;

    if (!should_unroll_loop_p (loop, desc, factor))
        return 1;

    return factor;
}
Пример #4
0
static unsigned
determine_unroll_factor (struct loop *loop, struct mem_ref_group *refs,
			 unsigned ninsns, struct tree_niter_desc *desc,
			 HOST_WIDE_INT est_niter)
{
  unsigned upper_bound;
  unsigned nfactor, factor, mod_constraint;
  struct mem_ref_group *agp;
  struct mem_ref *ref;

  /* First check whether the loop is not too large to unroll.  We ignore
     PARAM_MAX_UNROLL_TIMES, because for small loops, it prevented us
     from unrolling them enough to make exactly one cache line covered by each
     iteration.  Also, the goal of PARAM_MAX_UNROLL_TIMES is to prevent
     us from unrolling the loops too many times in cases where we only expect
     gains from better scheduling and decreasing loop overhead, which is not
     the case here.  */
  upper_bound = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / ninsns;

  /* If we unrolled the loop more times than it iterates, the unrolled version
     of the loop would be never entered.  */
  if (est_niter >= 0 && est_niter < (HOST_WIDE_INT) upper_bound)
    upper_bound = est_niter;

  if (upper_bound <= 1)
    return 1;

  /* Choose the factor so that we may prefetch each cache just once,
     but bound the unrolling by UPPER_BOUND.  */
  factor = 1;
  for (agp = refs; agp; agp = agp->next)
    for (ref = agp->refs; ref; ref = ref->next)
      if (should_issue_prefetch_p (ref))
	{
	  mod_constraint = ref->prefetch_mod;
	  nfactor = least_common_multiple (mod_constraint, factor);
	  if (nfactor <= upper_bound)
	    factor = nfactor;
	}

  if (!should_unroll_loop_p (loop, desc, factor))
    return 1;

  return factor;
}
Пример #5
0
static bool
schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor,
                     unsigned ahead)
{
    unsigned max_prefetches, n_prefetches;
    struct mem_ref *ref;
    bool any = false;

    max_prefetches = (SIMULTANEOUS_PREFETCHES * unroll_factor) / ahead;
    if (max_prefetches > (unsigned) SIMULTANEOUS_PREFETCHES)
        max_prefetches = SIMULTANEOUS_PREFETCHES;

    if (dump_file && (dump_flags & TDF_DETAILS))
        fprintf (dump_file, "Max prefetches to issue: %d.\n", max_prefetches);

    if (!max_prefetches)
        return false;

    /* For now we just take memory references one by one and issue
       prefetches for as many as possible.  The groups are sorted
       starting with the largest step, since the references with
       large step are more likely to cause many cache misses.  */

    for (; groups; groups = groups->next)
        for (ref = groups->refs; ref; ref = ref->next)
        {
            if (!should_issue_prefetch_p (ref))
                continue;

            ref->issue_prefetch_p = true;

            /* If prefetch_mod is less then unroll_factor, we need to insert
               several prefetches for the reference.  */
            n_prefetches = ((unroll_factor + ref->prefetch_mod - 1)
                            / ref->prefetch_mod);
            if (max_prefetches <= n_prefetches)
                return true;

            max_prefetches -= n_prefetches;
            any = true;
        }

    return any;
}