Example #1
0
unsigned int
tree_ssa_unswitch_loops (void)
{
    loop_iterator li;
    struct loop *loop;
    bool changed = false;
    HOST_WIDE_INT iterations;

    /* Go through inner loops (only original ones).  */
    FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
    {
        if (dump_file && (dump_flags & TDF_DETAILS))
            fprintf (dump_file, ";; Considering loop %d\n", loop->num);

        /* Do not unswitch in cold regions. */
        if (optimize_loop_for_size_p (loop))
        {
            if (dump_file && (dump_flags & TDF_DETAILS))
                fprintf (dump_file, ";; Not unswitching cold loops\n");
            continue;
        }

        /* The loop should not be too large, to limit code growth. */
        if (tree_num_loop_insns (loop, &eni_size_weights)
                > (unsigned) PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS))
        {
            if (dump_file && (dump_flags & TDF_DETAILS))
                fprintf (dump_file, ";; Not unswitching, loop too big\n");
            continue;
        }

        /* If the loop is not expected to iterate, there is no need
        for unswitching.  */
        iterations = estimated_loop_iterations_int (loop);
        if (iterations >= 0 && iterations <= 1)
        {
            if (dump_file && (dump_flags & TDF_DETAILS))
                fprintf (dump_file, ";; Not unswitching, loop is not expected to iterate\n");
            continue;
        }

        changed |= tree_unswitch_single_loop (loop, 0);
    }

    if (changed)
        return TODO_cleanup_cfg;
    return 0;
}
/* Unswitch single LOOP.  COND_CHECKED holds list of conditions we already
   unswitched on and are therefore known to be true in this LOOP.  NUM is
   number of unswitchings done; do not allow it to grow too much, it is too
   easy to create example on that the code would grow exponentially.
   Returns true LOOP was unswitched.  */
static bool 
unswitch_single_loop (struct loop *loop, rtx cond_checked, int num)
{
  basic_block *bbs;
  struct loop *nloop;
  unsigned i;
  rtx cond, rcond = NULL_RTX, conds, rconds, acond, cinsn;
  int repeat;
  edge e;
  HOST_WIDE_INT iterations;

  /* Do not unswitch too much.  */
  if (num > PARAM_VALUE (PARAM_MAX_UNSWITCH_LEVEL))
    {
      if (dump_file)
	fprintf (dump_file, ";; Not unswitching anymore, hit max level\n");
      return false;
    }

  /* Only unswitch innermost loops.  */
  if (loop->inner)
    {
      if (dump_file)
	fprintf (dump_file, ";; Not unswitching, not innermost loop\n");
      return false;
    }

  /* We must be able to duplicate loop body.  */
  if (!can_duplicate_loop_p (loop))
    {
      if (dump_file)
	fprintf (dump_file, ";; Not unswitching, can't duplicate loop\n");
      return false;
    }

  /* The loop should not be too large, to limit code growth.  */
  if (num_loop_insns (loop) > PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS))
    {
      if (dump_file)
	fprintf (dump_file, ";; Not unswitching, loop too big\n");
      return false;
    }

  /* Do not unswitch in cold areas.  */
  if (optimize_loop_for_size_p (loop))
    {
      if (dump_file)
	fprintf (dump_file, ";; Not unswitching, not hot area\n");
      return false;
    }

  /* Nor if the loop usually does not roll.  */
  iterations = estimated_loop_iterations_int (loop);
  if (iterations >= 0 && iterations <= 1)
    {
      if (dump_file)
	fprintf (dump_file, ";; Not unswitching, loop iterations < 1\n");
      return false;
    }

  do
    {
      repeat = 0;
      cinsn = NULL_RTX;

      /* Find a bb to unswitch on.  */
      bbs = get_loop_body (loop);
      iv_analysis_loop_init (loop);
      for (i = 0; i < loop->num_nodes; i++)
	if ((cond = may_unswitch_on (bbs[i], loop, &cinsn)))
	  break;

      if (i == loop->num_nodes)
	{
	  free (bbs);
	  return false;
	}

      if (cond != const0_rtx
	  && cond != const_true_rtx)
	{
	  rcond = reversed_condition (cond);
	  if (rcond)
	    rcond = canon_condition (rcond);

	  /* Check whether the result can be predicted.  */
	  for (acond = cond_checked; acond; acond = XEXP (acond, 1))
	    simplify_using_condition (XEXP (acond, 0), &cond, NULL);
	}

      if (cond == const_true_rtx)
	{
	  /* Remove false path.  */
	  e = FALLTHRU_EDGE (bbs[i]);
	  remove_path (e);
	  free (bbs);
	  repeat = 1;
	}
      else if (cond == const0_rtx)
	{
	  /* Remove true path.  */
	  e = BRANCH_EDGE (bbs[i]);
	  remove_path (e);
	  free (bbs);
	  repeat = 1;
	}
    } while (repeat);

  /* We found the condition we can unswitch on.  */
  conds = alloc_EXPR_LIST (0, cond, cond_checked);
  if (rcond)
    rconds = alloc_EXPR_LIST (0, rcond, cond_checked);
  else
    rconds = cond_checked;

  if (dump_file)
    fprintf (dump_file, ";; Unswitching loop\n");

  /* Unswitch the loop on this condition.  */
  nloop = unswitch_loop (loop, bbs[i], copy_rtx_if_shared (cond), cinsn);
  gcc_assert (nloop);

  /* Invoke itself on modified loops.  */
  unswitch_single_loop (nloop, rconds, num + 1);
  unswitch_single_loop (loop, conds, num + 1);

  free_EXPR_LIST_node (conds);
  if (rcond)
    free_EXPR_LIST_node (rconds);

  free (bbs);

  return true;
}
Example #3
0
static bool
loop_prefetch_arrays (struct loop *loop)
{
  struct mem_ref_group *refs;
  unsigned ahead, ninsns, time, unroll_factor;
  HOST_WIDE_INT est_niter;
  struct tree_niter_desc desc;
  bool unrolled = false, no_other_refs;

  if (optimize_loop_nest_for_size_p (loop))
    {
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file, "  ignored (cold area)\n");
      return false;
    }

  /* Step 1: gather the memory references.  */
  refs = gather_memory_references (loop, &no_other_refs);

  /* Step 2: estimate the reuse effects.  */
  prune_by_reuse (refs);

  if (!anything_to_prefetch_p (refs))
    goto fail;

  determine_loop_nest_reuse (loop, refs, no_other_refs);

  /* Step 3: determine the ahead and unroll factor.  */

  /* FIXME: the time should be weighted by the probabilities of the blocks in
     the loop body.  */
  time = tree_num_loop_insns (loop, &eni_time_weights);
  ahead = (PREFETCH_LATENCY + time - 1) / time;
  est_niter = estimated_loop_iterations_int (loop, false);

  /* The prefetches will run for AHEAD iterations of the original loop.  Unless
     the loop rolls at least AHEAD times, prefetching the references does not
     make sense.  */
  if (est_niter >= 0 && est_niter <= (HOST_WIDE_INT) ahead)
    {
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file,
		 "Not prefetching -- loop estimated to roll only %d times\n",
		 (int) est_niter);
      goto fail;
    }

  mark_nontemporal_stores (loop, refs);

  ninsns = tree_num_loop_insns (loop, &eni_size_weights);
  unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc,
					   est_niter);
  if (dump_file && (dump_flags & TDF_DETAILS))
    fprintf (dump_file, "Ahead %d, unroll factor %d\n", ahead, unroll_factor);

  /* Step 4: what to prefetch?  */
  if (!schedule_prefetches (refs, unroll_factor, ahead))
    goto fail;

  /* Step 5: unroll the loop.  TODO -- peeling of first and last few
     iterations so that we do not issue superfluous prefetches.  */
  if (unroll_factor != 1)
    {
      tree_unroll_loop (loop, unroll_factor,
			single_dom_exit (loop), &desc);
      unrolled = true;
    }

  /* Step 6: issue the prefetches.  */
  issue_prefetches (refs, unroll_factor, ahead);

fail:
  release_mem_refs (refs);
  return unrolled;
}
Example #4
0
static void
determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs,
			   bool no_other_refs)
{
  struct loop *nest, *aloop;
  VEC (data_reference_p, heap) *datarefs = NULL;
  VEC (ddr_p, heap) *dependences = NULL;
  struct mem_ref_group *gr;
  struct mem_ref *ref, *refb;
  VEC (loop_p, heap) *vloops = NULL;
  unsigned *loop_data_size;
  unsigned i, j, n;
  unsigned volume, dist, adist;
  HOST_WIDE_INT vol;
  data_reference_p dr;
  ddr_p dep;

  if (loop->inner)
    return;

  /* Find the outermost loop of the loop nest of loop (we require that
     there are no sibling loops inside the nest).  */
  nest = loop;
  while (1)
    {
      aloop = loop_outer (nest);

      if (aloop == current_loops->tree_root
	  || aloop->inner->next)
	break;

      nest = aloop;
    }

  /* For each loop, determine the amount of data accessed in each iteration.
     We use this to estimate whether the reference is evicted from the
     cache before its reuse.  */
  find_loop_nest (nest, &vloops);
  n = VEC_length (loop_p, vloops);
  loop_data_size = XNEWVEC (unsigned, n);
  volume = volume_of_references (refs);
  i = n;
  while (i-- != 0)
    {
      loop_data_size[i] = volume;
      /* Bound the volume by the L2 cache size, since above this bound,
	 all dependence distances are equivalent.  */
      if (volume > L2_CACHE_SIZE_BYTES)
	continue;

      aloop = VEC_index (loop_p, vloops, i);
      vol = estimated_loop_iterations_int (aloop, false);
      if (vol < 0)
	vol = expected_loop_iterations (aloop);
      volume *= vol;
    }

  /* Prepare the references in the form suitable for data dependence
     analysis.  We ignore unanalyzable data references (the results
     are used just as a heuristics to estimate temporality of the
     references, hence we do not need to worry about correctness).  */
  for (gr = refs; gr; gr = gr->next)
    for (ref = gr->refs; ref; ref = ref->next)
      {
	dr = create_data_ref (nest, ref->mem, ref->stmt, !ref->write_p);

	if (dr)
	  {
	    ref->reuse_distance = volume;
	    dr->aux = ref;
	    VEC_safe_push (data_reference_p, heap, datarefs, dr);
	  }
	else
	  no_other_refs = false;
      }

  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
    {
      dist = self_reuse_distance (dr, loop_data_size, n, loop);
      ref = (struct mem_ref *) dr->aux;
      if (ref->reuse_distance > dist)
	ref->reuse_distance = dist;

      if (no_other_refs)
	ref->independent_p = true;
    }

  compute_all_dependences (datarefs, &dependences, vloops, true);

  for (i = 0; VEC_iterate (ddr_p, dependences, i, dep); i++)
    {
      if (DDR_ARE_DEPENDENT (dep) == chrec_known)
	continue;

      ref = (struct mem_ref *) DDR_A (dep)->aux;
      refb = (struct mem_ref *) DDR_B (dep)->aux;

      if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know
	  || DDR_NUM_DIST_VECTS (dep) == 0)
	{
	  /* If the dependence cannot be analyzed, assume that there might be
	     a reuse.  */
	  dist = 0;
      
	  ref->independent_p = false;
	  refb->independent_p = false;
	}
      else
	{
	  /* The distance vectors are normalized to be always lexicographically
	     positive, hence we cannot tell just from them whether DDR_A comes
	     before DDR_B or vice versa.  However, it is not important,
	     anyway -- if DDR_A is close to DDR_B, then it is either reused in
	     DDR_B (and it is not nontemporal), or it reuses the value of DDR_B
	     in cache (and marking it as nontemporal would not affect
	     anything).  */

	  dist = volume;
	  for (j = 0; j < DDR_NUM_DIST_VECTS (dep); j++)
	    {
	      adist = volume_of_dist_vector (DDR_DIST_VECT (dep, j),
					     loop_data_size, n);

	      /* If this is a dependence in the innermost loop (i.e., the
		 distances in all superloops are zero) and it is not
		 the trivial self-dependence with distance zero, record that
		 the references are not completely independent.  */
	      if (lambda_vector_zerop (DDR_DIST_VECT (dep, j), n - 1)
		  && (ref != refb
		      || DDR_DIST_VECT (dep, j)[n-1] != 0))
		{
		  ref->independent_p = false;
		  refb->independent_p = false;
		}

	      /* Ignore accesses closer than
		 L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION,
	      	 so that we use nontemporal prefetches e.g. if single memory
		 location is accessed several times in a single iteration of
		 the loop.  */
	      if (adist < L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION)
		continue;

	      if (adist < dist)
		dist = adist;
	    }
	}

      if (ref->reuse_distance > dist)
	ref->reuse_distance = dist;
      if (refb->reuse_distance > dist)
	refb->reuse_distance = dist;
    }

  free_dependence_relations (dependences);
  free_data_refs (datarefs);
  free (loop_data_size);

  if (dump_file && (dump_flags & TDF_DETAILS))
    {
      fprintf (dump_file, "Reuse distances:\n");
      for (gr = refs; gr; gr = gr->next)
	for (ref = gr->refs; ref; ref = ref->next)
	  fprintf (dump_file, " ref %p distance %u\n",
		   (void *) ref, ref->reuse_distance);
    }
}