Exemplo n.º 1
0
static bool
loop_prefetch_arrays (struct loops *loops, struct loop *loop)
{
    struct mem_ref_group *refs;
    unsigned ahead, ninsns, unroll_factor;
    struct tree_niter_desc desc;
    bool unrolled = false;

    /* Step 1: gather the memory references.  */
    refs = gather_memory_references (loop);

    /* Step 2: estimate the reuse effects.  */
    prune_by_reuse (refs);

    if (!anything_to_prefetch_p (refs))
        goto fail;

    /* Step 3: determine the ahead and unroll factor.  */

    /* FIXME: We should use not size of the loop, but the average number of
       instructions executed per iteration of the loop.  */
    ninsns = tree_num_loop_insns (loop);
    ahead = (PREFETCH_LATENCY + ninsns - 1) / ninsns;
    unroll_factor = determine_unroll_factor (loop, refs, ahead, ninsns,
                    &desc);
    if (dump_file && (dump_flags & TDF_DETAILS))
        fprintf (dump_file, "Ahead %d, unroll factor %d\n", ahead, unroll_factor);

    /* If the loop rolls less than the required unroll factor, prefetching
       is useless.  */
    if (unroll_factor > 1
            && cst_and_fits_in_hwi (desc.niter)
            && (unsigned HOST_WIDE_INT) int_cst_value (desc.niter) < unroll_factor)
        goto fail;

    /* Step 4: what to prefetch?  */
    if (!schedule_prefetches (refs, unroll_factor, ahead))
        goto fail;

    /* Step 5: unroll the loop.  TODO -- peeling of first and last few
       iterations so that we do not issue superfluous prefetches.  */
    if (unroll_factor != 1)
    {
        tree_unroll_loop (loops, loop, unroll_factor,
                          single_dom_exit (loop), &desc);
        unrolled = true;
    }

    /* Step 6: issue the prefetches.  */
    issue_prefetches (refs, unroll_factor, ahead);

fail:
    release_mem_refs (refs);
    return unrolled;
}
Exemplo n.º 2
0
static bool
loop_prefetch_arrays (struct loop *loop)
{
  struct mem_ref_group *refs;
  unsigned ahead, ninsns, time, unroll_factor;
  HOST_WIDE_INT est_niter;
  struct tree_niter_desc desc;
  bool unrolled = false, no_other_refs;

  if (optimize_loop_nest_for_size_p (loop))
    {
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file, "  ignored (cold area)\n");
      return false;
    }

  /* Step 1: gather the memory references.  */
  refs = gather_memory_references (loop, &no_other_refs);

  /* Step 2: estimate the reuse effects.  */
  prune_by_reuse (refs);

  if (!anything_to_prefetch_p (refs))
    goto fail;

  determine_loop_nest_reuse (loop, refs, no_other_refs);

  /* Step 3: determine the ahead and unroll factor.  */

  /* FIXME: the time should be weighted by the probabilities of the blocks in
     the loop body.  */
  time = tree_num_loop_insns (loop, &eni_time_weights);
  ahead = (PREFETCH_LATENCY + time - 1) / time;
  est_niter = estimated_loop_iterations_int (loop, false);

  /* The prefetches will run for AHEAD iterations of the original loop.  Unless
     the loop rolls at least AHEAD times, prefetching the references does not
     make sense.  */
  if (est_niter >= 0 && est_niter <= (HOST_WIDE_INT) ahead)
    {
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file,
		 "Not prefetching -- loop estimated to roll only %d times\n",
		 (int) est_niter);
      goto fail;
    }

  mark_nontemporal_stores (loop, refs);

  ninsns = tree_num_loop_insns (loop, &eni_size_weights);
  unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc,
					   est_niter);
  if (dump_file && (dump_flags & TDF_DETAILS))
    fprintf (dump_file, "Ahead %d, unroll factor %d\n", ahead, unroll_factor);

  /* Step 4: what to prefetch?  */
  if (!schedule_prefetches (refs, unroll_factor, ahead))
    goto fail;

  /* Step 5: unroll the loop.  TODO -- peeling of first and last few
     iterations so that we do not issue superfluous prefetches.  */
  if (unroll_factor != 1)
    {
      tree_unroll_loop (loop, unroll_factor,
			single_dom_exit (loop), &desc);
      unrolled = true;
    }

  /* Step 6: issue the prefetches.  */
  issue_prefetches (refs, unroll_factor, ahead);

fail:
  release_mem_refs (refs);
  return unrolled;
}