Exemplo n.º 1
0
Arquivo: lcm.c Projeto: krnowak/gcc
static void
compute_laterin (struct edge_list *edge_list, sbitmap *earliest,
		 sbitmap *antloc, sbitmap *later, sbitmap *laterin)
{
  int num_edges, i;
  edge e;
  basic_block *worklist, *qin, *qout, *qend, bb;
  unsigned int qlen;
  edge_iterator ei;

  num_edges = NUM_EDGES (edge_list);

  /* Allocate a worklist array/queue.  Entries are only added to the
     list if they were not already on the list.  So the size is
     bounded by the number of basic blocks.  */
  qin = qout = worklist
    = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));

  /* Initialize a mapping from each edge to its index.  */
  for (i = 0; i < num_edges; i++)
    INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;

  /* We want a maximal solution, so initially consider LATER true for
     all edges.  This allows propagation through a loop since the incoming
     loop edge will have LATER set, so if all the other incoming edges
     to the loop are set, then LATERIN will be set for the head of the
     loop.

     If the optimistic setting of LATER on that edge was incorrect (for
     example the expression is ANTLOC in a block within the loop) then
     this algorithm will detect it when we process the block at the head
     of the optimistic edge.  That will requeue the affected blocks.  */
  bitmap_vector_ones (later, num_edges);

  /* Note that even though we want an optimistic setting of LATER, we
     do not want to be overly optimistic.  Consider an outgoing edge from
     the entry block.  That edge should always have a LATER value the
     same as EARLIEST for that edge.  */
  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
    bitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]);

  /* Add all the blocks to the worklist.  This prevents an early exit from
     the loop given our optimistic initialization of LATER above.  */
  int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
  int postorder_num = inverted_post_order_compute (postorder);
  for (int i = 0; i < postorder_num; ++i)
    {
      bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]);
      if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun)
	  || bb == ENTRY_BLOCK_PTR_FOR_FN (cfun))
	continue;
      *qin++ = bb;
      bb->aux = bb;
    }
  free (postorder);

  /* Note that we do not use the last allocated element for our queue,
     as EXIT_BLOCK is never inserted into it. */
  qin = worklist;
  qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS];
  qlen = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;

  /* Iterate until the worklist is empty.  */
  while (qlen)
    {
      /* Take the first entry off the worklist.  */
      bb = *qout++;
      bb->aux = NULL;
      qlen--;
      if (qout >= qend)
	qout = worklist;

      /* Compute the intersection of LATERIN for each incoming edge to B.  */
      bitmap_ones (laterin[bb->index]);
      FOR_EACH_EDGE (e, ei, bb->preds)
	bitmap_and (laterin[bb->index], laterin[bb->index],
		    later[(size_t)e->aux]);

      /* Calculate LATER for all outgoing edges.  */
      FOR_EACH_EDGE (e, ei, bb->succs)
	if (bitmap_ior_and_compl (later[(size_t) e->aux],
				  earliest[(size_t) e->aux],
				  laterin[bb->index],
				  antloc[bb->index])
	    /* If LATER for an outgoing edge was changed, then we need
	       to add the target of the outgoing edge to the worklist.  */
	    && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest->aux == 0)
	  {
	    *qin++ = e->dest;
	    e->dest->aux = e;
	    qlen++;
	    if (qin >= qend)
	      qin = worklist;
	  }
    }

  /* Computation of insertion and deletion points requires computing LATERIN
     for the EXIT block.  We allocated an extra entry in the LATERIN array
     for just this purpose.  */
  bitmap_ones (laterin[last_basic_block_for_fn (cfun)]);
  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
    bitmap_and (laterin[last_basic_block_for_fn (cfun)],
		laterin[last_basic_block_for_fn (cfun)],
		later[(size_t) e->aux]);

  clear_aux_for_edges ();
  free (worklist);
}
Exemplo n.º 2
0
static bool
try_unroll_loop_completely (struct loop *loop,
			    edge exit, tree niter,
			    enum unroll_level ul,
			    HOST_WIDE_INT maxiter,
			    location_t locus)
{
  unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
  struct loop_size size;
  bool n_unroll_found = false;
  edge edge_to_cancel = NULL;
  int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;

  /* See if we proved number of iterations to be low constant.

     EXIT is an edge that will be removed in all but last iteration of 
     the loop.

     EDGE_TO_CACNEL is an edge that will be removed from the last iteration
     of the unrolled sequence and is expected to make the final loop not
     rolling. 

     If the number of execution of loop is determined by standard induction
     variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
     from the iv test.  */
  if (tree_fits_uhwi_p (niter))
    {
      n_unroll = tree_to_uhwi (niter);
      n_unroll_found = true;
      edge_to_cancel = EDGE_SUCC (exit->src, 0);
      if (edge_to_cancel == exit)
	edge_to_cancel = EDGE_SUCC (exit->src, 1);
    }
  /* We do not know the number of iterations and thus we can not eliminate
     the EXIT edge.  */
  else
    exit = NULL;

  /* See if we can improve our estimate by using recorded loop bounds.  */
  if (maxiter >= 0
      && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
    {
      n_unroll = maxiter;
      n_unroll_found = true;
      /* Loop terminates before the IV variable test, so we can not
	 remove it in the last iteration.  */
      edge_to_cancel = NULL;
    }

  if (!n_unroll_found)
    return false;

  if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
    {
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file, "Not unrolling loop %d "
		 "(--param max-completely-peeled-times limit reached).\n",
		 loop->num);
      return false;
    }

  if (!edge_to_cancel)
    edge_to_cancel = loop_edge_to_cancel (loop);

  if (n_unroll)
    {
      sbitmap wont_exit;
      edge e;
      unsigned i;
      bool large;
      vec<edge> to_remove = vNULL;
      if (ul == UL_SINGLE_ITER)
	return false;

      large = tree_estimate_loop_size
		 (loop, exit, edge_to_cancel, &size,
		  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
      ninsns = size.overall;
      if (large)
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
		     loop->num);
	  return false;
	}

      unr_insns = estimated_unrolled_size (&size, n_unroll);
      if (dump_file && (dump_flags & TDF_DETAILS))
	{
	  fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
	  fprintf (dump_file, "  Estimated size after unrolling: %d\n",
		   (int) unr_insns);
	}

      /* If the code is going to shrink, we don't need to be extra cautious
	 on guessing if the unrolling is going to be profitable.  */
      if (unr_insns
	  /* If there is IV variable that will become constant, we save
	     one instruction in the loop prologue we do not account
	     otherwise.  */
	  <= ninsns + (size.constant_iv != false))
	;
      /* We unroll only inner loops, because we do not consider it profitable
	 otheriwse.  We still can cancel loopback edge of not rolling loop;
	 this is always a good idea.  */
      else if (ul == UL_NO_GROWTH)
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
		     loop->num);
	  return false;
	}
      /* Outer loops tend to be less interesting candidates for complete
	 unrolling unless we can do a lot of propagation into the inner loop
	 body.  For now we disable outer loop unrolling when the code would
	 grow.  */
      else if (loop->inner)
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d: "
		     "it is not innermost and code would grow.\n",
		     loop->num);
	  return false;
	}
      /* If there is call on a hot path through the loop, then
	 there is most probably not much to optimize.  */
      else if (size.num_non_pure_calls_on_hot_path)
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d: "
		     "contains call and code would grow.\n",
		     loop->num);
	  return false;
	}
      /* If there is pure/const call in the function, then we
	 can still optimize the unrolled loop body if it contains
	 some other interesting code than the calls and code
	 storing or cumulating the return value.  */
      else if (size.num_pure_calls_on_hot_path
	       /* One IV increment, one test, one ivtmp store
		  and one useful stmt.  That is about minimal loop
		  doing pure call.  */
	       && (size.non_call_stmts_on_hot_path
		   <= 3 + size.num_pure_calls_on_hot_path))
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d: "
		     "contains just pure calls and code would grow.\n",
		     loop->num);
	  return false;
	}
      /* Complette unrolling is major win when control flow is removed and
	 one big basic block is created.  If the loop contains control flow
	 the optimization may still be a win because of eliminating the loop
	 overhead but it also may blow the branch predictor tables.
	 Limit number of branches on the hot path through the peeled
	 sequence.  */
      else if (size.num_branches_on_hot_path * (int)n_unroll
	       > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d: "
		     " number of branches on hot path in the unrolled sequence"
		     " reach --param max-peel-branches limit.\n",
		     loop->num);
	  return false;
	}
      else if (unr_insns
	       > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d: "
		     "(--param max-completely-peeled-insns limit reached).\n",
		     loop->num);
	  return false;
	}
      dump_printf_loc (report_flags, locus,
                       "loop turned into non-loop; it never loops.\n");

      initialize_original_copy_tables ();
      wont_exit = sbitmap_alloc (n_unroll + 1);
      bitmap_ones (wont_exit);
      bitmap_clear_bit (wont_exit, 0);

      if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
						 n_unroll, wont_exit,
						 exit, &to_remove,
						 DLTHE_FLAG_UPDATE_FREQ
						 | DLTHE_FLAG_COMPLETTE_PEEL))
	{
          free_original_copy_tables ();
	  free (wont_exit);
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Failed to duplicate the loop\n");
	  return false;
	}

      FOR_EACH_VEC_ELT (to_remove, i, e)
	{
	  bool ok = remove_path (e);
	  gcc_assert (ok);
	}

      to_remove.release ();
      free (wont_exit);
      free_original_copy_tables ();
    }
Exemplo n.º 3
0
Arquivo: lcm.c Projeto: ChaosJohn/gcc
static void
compute_nearerout (struct edge_list *edge_list, sbitmap *farthest,
		   sbitmap *st_avloc, sbitmap *nearer, sbitmap *nearerout)
{
  int num_edges, i;
  edge e;
  basic_block *worklist, *tos, bb;
  edge_iterator ei;

  num_edges = NUM_EDGES (edge_list);

  /* Allocate a worklist array/queue.  Entries are only added to the
     list if they were not already on the list.  So the size is
     bounded by the number of basic blocks.  */
  tos = worklist = XNEWVEC (basic_block, n_basic_blocks + 1);

  /* Initialize NEARER for each edge and build a mapping from an edge to
     its index.  */
  for (i = 0; i < num_edges; i++)
    INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;

  /* We want a maximal solution.  */
  bitmap_vector_ones (nearer, num_edges);

  /* Note that even though we want an optimistic setting of NEARER, we
     do not want to be overly optimistic.  Consider an incoming edge to
     the exit block.  That edge should always have a NEARER value the
     same as FARTHEST for that edge.  */
  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
    bitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]);

  /* Add all the blocks to the worklist.  This prevents an early exit
     from the loop given our optimistic initialization of NEARER.  */
  FOR_EACH_BB (bb)
    {
      *tos++ = bb;
      bb->aux = bb;
    }

  /* Iterate until the worklist is empty.  */
  while (tos != worklist)
    {
      /* Take the first entry off the worklist.  */
      bb = *--tos;
      bb->aux = NULL;

      /* Compute the intersection of NEARER for each outgoing edge from B.  */
      bitmap_ones (nearerout[bb->index]);
      FOR_EACH_EDGE (e, ei, bb->succs)
	bitmap_and (nearerout[bb->index], nearerout[bb->index],
			 nearer[(size_t) e->aux]);

      /* Calculate NEARER for all incoming edges.  */
      FOR_EACH_EDGE (e, ei, bb->preds)
	if (bitmap_ior_and_compl (nearer[(size_t) e->aux],
				      farthest[(size_t) e->aux],
				      nearerout[e->dest->index],
				      st_avloc[e->dest->index])
	    /* If NEARER for an incoming edge was changed, then we need
	       to add the source of the incoming edge to the worklist.  */
	    && e->src != ENTRY_BLOCK_PTR && e->src->aux == 0)
	  {
	    *tos++ = e->src;
	    e->src->aux = e;
	  }
    }

  /* Computation of insertion and deletion points requires computing NEAREROUT
     for the ENTRY block.  We allocated an extra entry in the NEAREROUT array
     for just this purpose.  */
  bitmap_ones (nearerout[last_basic_block]);
  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
    bitmap_and (nearerout[last_basic_block],
		     nearerout[last_basic_block],
		     nearer[(size_t) e->aux]);

  clear_aux_for_edges ();
  free (tos);
}