Пример #1
0
static void
gather_interchange_stats (varray_type dependence_relations, 
			  varray_type datarefs,
			  struct loop *loop,
			  struct loop *first_loop,
			  unsigned int *dependence_steps, 
			  unsigned int *nb_deps_not_carried_by_loop, 
			  unsigned int *access_strides)
{
  unsigned int i;

  *dependence_steps = 0;
  *nb_deps_not_carried_by_loop = 0;
  *access_strides = 0;

  for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
    {
      int dist;
      struct data_dependence_relation *ddr = 
	(struct data_dependence_relation *) 
	VARRAY_GENERIC_PTR (dependence_relations, i);

      /* If we don't know anything about this dependence, or the distance
	 vector is NULL, or there is no dependence, then there is no reuse of
	 data.  */

      if (DDR_DIST_VECT (ddr) == NULL
	  || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
	  || DDR_ARE_DEPENDENT (ddr) == chrec_known)
	continue;
      

      
      dist = DDR_DIST_VECT (ddr)[loop->depth - first_loop->depth];
      if (dist == 0)
	(*nb_deps_not_carried_by_loop) += 1;
      else if (dist < 0)
	(*dependence_steps) += -dist;
      else
	(*dependence_steps) += dist;
    }

  /* Compute the access strides.  */
  for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
    {
      unsigned int it;
      struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
      tree stmt = DR_STMT (dr);
      struct loop *stmt_loop = loop_containing_stmt (stmt);
      struct loop *inner_loop = first_loop->inner;
      
      if (inner_loop != stmt_loop 
	  && !flow_loop_nested_p (inner_loop, stmt_loop))
	continue;
      for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++)
	{
	  tree chrec = DR_ACCESS_FN (dr, it);
	  tree tstride = evolution_part_in_loop_num 
	    (chrec, loop->num);
	  
	  if (tstride == NULL_TREE
	      || TREE_CODE (tstride) != INTEGER_CST)
	    continue;
	  
	  (*access_strides) += int_cst_value (tstride);
	}
    }
}
Пример #2
0
static lambda_trans_matrix
try_interchange_loops (lambda_trans_matrix trans, 
		       unsigned int depth,		       
		       varray_type dependence_relations,
		       varray_type datarefs, 
		       struct loop *first_loop)
{
  struct loop *loop_i;
  struct loop *loop_j;
  unsigned int dependence_steps_i, dependence_steps_j;
  unsigned int access_strides_i, access_strides_j;
  unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
  struct data_dependence_relation *ddr;

  /* When there is an unknown relation in the dependence_relations, we
     know that it is no worth looking at this loop nest: give up.  */
  ddr = (struct data_dependence_relation *) 
    VARRAY_GENERIC_PTR (dependence_relations, 0);
  if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
    return trans;
  
  /* LOOP_I is always the outer loop.  */
  for (loop_j = first_loop->inner; 
       loop_j; 
       loop_j = loop_j->inner)
    for (loop_i = first_loop; 
	 loop_i->depth < loop_j->depth; 
	 loop_i = loop_i->inner)
      {
	gather_interchange_stats (dependence_relations, datarefs,
				  loop_i, first_loop,
				  &dependence_steps_i, 
				  &nb_deps_not_carried_by_i,
				  &access_strides_i);
	gather_interchange_stats (dependence_relations, datarefs,
				  loop_j, first_loop,
				  &dependence_steps_j, 
				  &nb_deps_not_carried_by_j, 
				  &access_strides_j);
	
	/* Heuristics for loop interchange profitability:

	   1. (spatial locality) Inner loops should have smallest
              dependence steps.

	   2. (spatial locality) Inner loops should contain more
	   dependence relations not carried by the loop.

	   3. (temporal locality) Inner loops should have smallest 
	      array access strides.
	*/
	if (dependence_steps_i < dependence_steps_j 
	    || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
	    || access_strides_i < access_strides_j)
	  {
	    lambda_matrix_row_exchange (LTM_MATRIX (trans),
					loop_i->depth - first_loop->depth,
					loop_j->depth - first_loop->depth);
	    /* Validate the resulting matrix.  When the transformation
	       is not valid, reverse to the previous transformation.  */
	    if (!lambda_transform_legal_p (trans, depth, dependence_relations))
	      lambda_matrix_row_exchange (LTM_MATRIX (trans), 
					  loop_i->depth - first_loop->depth, 
					  loop_j->depth - first_loop->depth);
	  }
      }

  return trans;
}
Пример #3
0
void
linear_transform_loops (struct loops *loops)
{
  unsigned int i;
  
  compute_immediate_uses (TDFA_USE_OPS | TDFA_USE_VOPS, NULL);
  for (i = 1; i < loops->num; i++)
    {
      unsigned int depth = 0;
      varray_type datarefs;
      varray_type dependence_relations;
      struct loop *loop_nest = loops->parray[i];
      struct loop *temp;
      VEC (tree) *oldivs = NULL;
      VEC (tree) *invariants = NULL;
      lambda_loopnest before, after;
      lambda_trans_matrix trans;
      bool problem = false;
      bool need_perfect_nest = false;
      /* If it's not a loop nest, we don't want it.
         We also don't handle sibling loops properly, 
         which are loops of the following form:
         for (i = 0; i < 50; i++)
           {
             for (j = 0; j < 50; j++)
               {
	        ...
               }
           for (j = 0; j < 50; j++)
               {
                ...
               }
           } */
      if (!loop_nest->inner)
	continue;
      depth = 1;
      for (temp = loop_nest->inner; temp; temp = temp->inner)
	{
	  flow_loop_scan (temp, LOOP_ALL);
	  /* If we have a sibling loop or multiple exit edges, jump ship.  */
	  if (temp->next || temp->num_exits != 1)
	    {
	      problem = true;
	      break;
	    }
	  depth ++;
	}
      if (problem)
	continue;

      /* Analyze data references and dependence relations using scev.  */      
 
      VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs");
      VARRAY_GENERIC_PTR_INIT (dependence_relations, 10,
			       "dependence_relations");
      
  
      compute_data_dependences_for_loop (depth, loop_nest,
					 &datarefs, &dependence_relations);
      if (dump_file && (dump_flags & TDF_DETAILS))
	{
	  unsigned int j;
	  for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++)
	    {
	      struct data_dependence_relation *ddr = 
		(struct data_dependence_relation *) 
		VARRAY_GENERIC_PTR (dependence_relations, j);

	      if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
		{
		  fprintf (dump_file, "DISTANCE_V (");
		  print_lambda_vector (dump_file, DDR_DIST_VECT (ddr), 
				       DDR_SIZE_VECT (ddr));
		  fprintf (dump_file, ")\n");
		  fprintf (dump_file, "DIRECTION_V (");
		  print_lambda_vector (dump_file, DDR_DIR_VECT (ddr), 
				       DDR_SIZE_VECT (ddr));
		  fprintf (dump_file, ")\n");
		}
	    }
	  fprintf (dump_file, "\n\n");
	}
      /* Build the transformation matrix.  */
      trans = lambda_trans_matrix_new (depth, depth);
      lambda_matrix_id (LTM_MATRIX (trans), depth);

      trans = try_interchange_loops (trans, depth, dependence_relations,
				     datarefs, loop_nest);

      if (lambda_trans_matrix_id_p (trans))
	{
	  if (dump_file)
	   fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n");
	  continue;
	}

      /* Check whether the transformation is legal.  */
      if (!lambda_transform_legal_p (trans, depth, dependence_relations))
	{
	  if (dump_file)
	    fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
	  continue;
	}
      if (!perfect_nest_p (loop_nest))
	need_perfect_nest = true;
      before = gcc_loopnest_to_lambda_loopnest (loops,
						loop_nest, &oldivs, 
						&invariants,
						need_perfect_nest);
      if (!before)
	continue;
            
      if (dump_file)
	{
	  fprintf (dump_file, "Before:\n");
	  print_lambda_loopnest (dump_file, before, 'i');
	}
  
      after = lambda_loopnest_transform (before, trans);
      if (dump_file)
	{
	  fprintf (dump_file, "After:\n");
	  print_lambda_loopnest (dump_file, after, 'u');
	}
      lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
				       after, trans);
      if (dump_file)
	fprintf (dump_file, "Successfully transformed loop.\n");
      oldivs = NULL;
      invariants = NULL;
      free_dependence_relations (dependence_relations);
      free_data_refs (datarefs);
    }
  free_df ();
  scev_reset ();
  rewrite_into_loop_closed_ssa ();
#ifdef ENABLE_CHECKING
  verify_loop_closed_ssa ();
#endif
}
Пример #4
0
static void
determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs,
			   bool no_other_refs)
{
  struct loop *nest, *aloop;
  VEC (data_reference_p, heap) *datarefs = NULL;
  VEC (ddr_p, heap) *dependences = NULL;
  struct mem_ref_group *gr;
  struct mem_ref *ref, *refb;
  VEC (loop_p, heap) *vloops = NULL;
  unsigned *loop_data_size;
  unsigned i, j, n;
  unsigned volume, dist, adist;
  HOST_WIDE_INT vol;
  data_reference_p dr;
  ddr_p dep;

  if (loop->inner)
    return;

  /* Find the outermost loop of the loop nest of loop (we require that
     there are no sibling loops inside the nest).  */
  nest = loop;
  while (1)
    {
      aloop = loop_outer (nest);

      if (aloop == current_loops->tree_root
	  || aloop->inner->next)
	break;

      nest = aloop;
    }

  /* For each loop, determine the amount of data accessed in each iteration.
     We use this to estimate whether the reference is evicted from the
     cache before its reuse.  */
  find_loop_nest (nest, &vloops);
  n = VEC_length (loop_p, vloops);
  loop_data_size = XNEWVEC (unsigned, n);
  volume = volume_of_references (refs);
  i = n;
  while (i-- != 0)
    {
      loop_data_size[i] = volume;
      /* Bound the volume by the L2 cache size, since above this bound,
	 all dependence distances are equivalent.  */
      if (volume > L2_CACHE_SIZE_BYTES)
	continue;

      aloop = VEC_index (loop_p, vloops, i);
      vol = estimated_loop_iterations_int (aloop, false);
      if (vol < 0)
	vol = expected_loop_iterations (aloop);
      volume *= vol;
    }

  /* Prepare the references in the form suitable for data dependence
     analysis.  We ignore unanalyzable data references (the results
     are used just as a heuristics to estimate temporality of the
     references, hence we do not need to worry about correctness).  */
  for (gr = refs; gr; gr = gr->next)
    for (ref = gr->refs; ref; ref = ref->next)
      {
	dr = create_data_ref (nest, ref->mem, ref->stmt, !ref->write_p);

	if (dr)
	  {
	    ref->reuse_distance = volume;
	    dr->aux = ref;
	    VEC_safe_push (data_reference_p, heap, datarefs, dr);
	  }
	else
	  no_other_refs = false;
      }

  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
    {
      dist = self_reuse_distance (dr, loop_data_size, n, loop);
      ref = (struct mem_ref *) dr->aux;
      if (ref->reuse_distance > dist)
	ref->reuse_distance = dist;

      if (no_other_refs)
	ref->independent_p = true;
    }

  compute_all_dependences (datarefs, &dependences, vloops, true);

  for (i = 0; VEC_iterate (ddr_p, dependences, i, dep); i++)
    {
      if (DDR_ARE_DEPENDENT (dep) == chrec_known)
	continue;

      ref = (struct mem_ref *) DDR_A (dep)->aux;
      refb = (struct mem_ref *) DDR_B (dep)->aux;

      if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know
	  || DDR_NUM_DIST_VECTS (dep) == 0)
	{
	  /* If the dependence cannot be analyzed, assume that there might be
	     a reuse.  */
	  dist = 0;
      
	  ref->independent_p = false;
	  refb->independent_p = false;
	}
      else
	{
	  /* The distance vectors are normalized to be always lexicographically
	     positive, hence we cannot tell just from them whether DDR_A comes
	     before DDR_B or vice versa.  However, it is not important,
	     anyway -- if DDR_A is close to DDR_B, then it is either reused in
	     DDR_B (and it is not nontemporal), or it reuses the value of DDR_B
	     in cache (and marking it as nontemporal would not affect
	     anything).  */

	  dist = volume;
	  for (j = 0; j < DDR_NUM_DIST_VECTS (dep); j++)
	    {
	      adist = volume_of_dist_vector (DDR_DIST_VECT (dep, j),
					     loop_data_size, n);

	      /* If this is a dependence in the innermost loop (i.e., the
		 distances in all superloops are zero) and it is not
		 the trivial self-dependence with distance zero, record that
		 the references are not completely independent.  */
	      if (lambda_vector_zerop (DDR_DIST_VECT (dep, j), n - 1)
		  && (ref != refb
		      || DDR_DIST_VECT (dep, j)[n-1] != 0))
		{
		  ref->independent_p = false;
		  refb->independent_p = false;
		}

	      /* Ignore accesses closer than
		 L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION,
	      	 so that we use nontemporal prefetches e.g. if single memory
		 location is accessed several times in a single iteration of
		 the loop.  */
	      if (adist < L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION)
		continue;

	      if (adist < dist)
		dist = adist;
	    }
	}

      if (ref->reuse_distance > dist)
	ref->reuse_distance = dist;
      if (refb->reuse_distance > dist)
	refb->reuse_distance = dist;
    }

  free_dependence_relations (dependences);
  free_data_refs (datarefs);
  free (loop_data_size);

  if (dump_file && (dump_flags & TDF_DETAILS))
    {
      fprintf (dump_file, "Reuse distances:\n");
      for (gr = refs; gr; gr = gr->next)
	for (ref = gr->refs; ref; ref = ref->next)
	  fprintf (dump_file, " ref %p distance %u\n",
		   (void *) ref, ref->reuse_distance);
    }
}