static void gather_interchange_stats (varray_type dependence_relations, varray_type datarefs, struct loop *loop, struct loop *first_loop, unsigned int *dependence_steps, unsigned int *nb_deps_not_carried_by_loop, unsigned int *access_strides) { unsigned int i; *dependence_steps = 0; *nb_deps_not_carried_by_loop = 0; *access_strides = 0; for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++) { int dist; struct data_dependence_relation *ddr = (struct data_dependence_relation *) VARRAY_GENERIC_PTR (dependence_relations, i); /* If we don't know anything about this dependence, or the distance vector is NULL, or there is no dependence, then there is no reuse of data. */ if (DDR_DIST_VECT (ddr) == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know || DDR_ARE_DEPENDENT (ddr) == chrec_known) continue; dist = DDR_DIST_VECT (ddr)[loop->depth - first_loop->depth]; if (dist == 0) (*nb_deps_not_carried_by_loop) += 1; else if (dist < 0) (*dependence_steps) += -dist; else (*dependence_steps) += dist; } /* Compute the access strides. */ for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++) { unsigned int it; struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i); tree stmt = DR_STMT (dr); struct loop *stmt_loop = loop_containing_stmt (stmt); struct loop *inner_loop = first_loop->inner; if (inner_loop != stmt_loop && !flow_loop_nested_p (inner_loop, stmt_loop)) continue; for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++) { tree chrec = DR_ACCESS_FN (dr, it); tree tstride = evolution_part_in_loop_num (chrec, loop->num); if (tstride == NULL_TREE || TREE_CODE (tstride) != INTEGER_CST) continue; (*access_strides) += int_cst_value (tstride); } } }
static lambda_trans_matrix try_interchange_loops (lambda_trans_matrix trans, unsigned int depth, varray_type dependence_relations, varray_type datarefs, struct loop *first_loop) { struct loop *loop_i; struct loop *loop_j; unsigned int dependence_steps_i, dependence_steps_j; unsigned int access_strides_i, access_strides_j; unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j; struct data_dependence_relation *ddr; /* When there is an unknown relation in the dependence_relations, we know that it is no worth looking at this loop nest: give up. */ ddr = (struct data_dependence_relation *) VARRAY_GENERIC_PTR (dependence_relations, 0); if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) return trans; /* LOOP_I is always the outer loop. */ for (loop_j = first_loop->inner; loop_j; loop_j = loop_j->inner) for (loop_i = first_loop; loop_i->depth < loop_j->depth; loop_i = loop_i->inner) { gather_interchange_stats (dependence_relations, datarefs, loop_i, first_loop, &dependence_steps_i, &nb_deps_not_carried_by_i, &access_strides_i); gather_interchange_stats (dependence_relations, datarefs, loop_j, first_loop, &dependence_steps_j, &nb_deps_not_carried_by_j, &access_strides_j); /* Heuristics for loop interchange profitability: 1. (spatial locality) Inner loops should have smallest dependence steps. 2. (spatial locality) Inner loops should contain more dependence relations not carried by the loop. 3. (temporal locality) Inner loops should have smallest array access strides. */ if (dependence_steps_i < dependence_steps_j || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j || access_strides_i < access_strides_j) { lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i->depth - first_loop->depth, loop_j->depth - first_loop->depth); /* Validate the resulting matrix. When the transformation is not valid, reverse to the previous transformation. */ if (!lambda_transform_legal_p (trans, depth, dependence_relations)) lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i->depth - first_loop->depth, loop_j->depth - first_loop->depth); } } return trans; }
void linear_transform_loops (struct loops *loops) { unsigned int i; compute_immediate_uses (TDFA_USE_OPS | TDFA_USE_VOPS, NULL); for (i = 1; i < loops->num; i++) { unsigned int depth = 0; varray_type datarefs; varray_type dependence_relations; struct loop *loop_nest = loops->parray[i]; struct loop *temp; VEC (tree) *oldivs = NULL; VEC (tree) *invariants = NULL; lambda_loopnest before, after; lambda_trans_matrix trans; bool problem = false; bool need_perfect_nest = false; /* If it's not a loop nest, we don't want it. We also don't handle sibling loops properly, which are loops of the following form: for (i = 0; i < 50; i++) { for (j = 0; j < 50; j++) { ... } for (j = 0; j < 50; j++) { ... } } */ if (!loop_nest->inner) continue; depth = 1; for (temp = loop_nest->inner; temp; temp = temp->inner) { flow_loop_scan (temp, LOOP_ALL); /* If we have a sibling loop or multiple exit edges, jump ship. */ if (temp->next || temp->num_exits != 1) { problem = true; break; } depth ++; } if (problem) continue; /* Analyze data references and dependence relations using scev. */ VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs"); VARRAY_GENERIC_PTR_INIT (dependence_relations, 10, "dependence_relations"); compute_data_dependences_for_loop (depth, loop_nest, &datarefs, &dependence_relations); if (dump_file && (dump_flags & TDF_DETAILS)) { unsigned int j; for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++) { struct data_dependence_relation *ddr = (struct data_dependence_relation *) VARRAY_GENERIC_PTR (dependence_relations, j); if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) { fprintf (dump_file, "DISTANCE_V ("); print_lambda_vector (dump_file, DDR_DIST_VECT (ddr), DDR_SIZE_VECT (ddr)); fprintf (dump_file, ")\n"); fprintf (dump_file, "DIRECTION_V ("); print_lambda_vector (dump_file, DDR_DIR_VECT (ddr), DDR_SIZE_VECT (ddr)); fprintf (dump_file, ")\n"); } } fprintf (dump_file, "\n\n"); } /* Build the transformation matrix. */ trans = lambda_trans_matrix_new (depth, depth); lambda_matrix_id (LTM_MATRIX (trans), depth); trans = try_interchange_loops (trans, depth, dependence_relations, datarefs, loop_nest); if (lambda_trans_matrix_id_p (trans)) { if (dump_file) fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n"); continue; } /* Check whether the transformation is legal. */ if (!lambda_transform_legal_p (trans, depth, dependence_relations)) { if (dump_file) fprintf (dump_file, "Can't transform loop, transform is illegal:\n"); continue; } if (!perfect_nest_p (loop_nest)) need_perfect_nest = true; before = gcc_loopnest_to_lambda_loopnest (loops, loop_nest, &oldivs, &invariants, need_perfect_nest); if (!before) continue; if (dump_file) { fprintf (dump_file, "Before:\n"); print_lambda_loopnest (dump_file, before, 'i'); } after = lambda_loopnest_transform (before, trans); if (dump_file) { fprintf (dump_file, "After:\n"); print_lambda_loopnest (dump_file, after, 'u'); } lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants, after, trans); if (dump_file) fprintf (dump_file, "Successfully transformed loop.\n"); oldivs = NULL; invariants = NULL; free_dependence_relations (dependence_relations); free_data_refs (datarefs); } free_df (); scev_reset (); rewrite_into_loop_closed_ssa (); #ifdef ENABLE_CHECKING verify_loop_closed_ssa (); #endif }
static void determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs, bool no_other_refs) { struct loop *nest, *aloop; VEC (data_reference_p, heap) *datarefs = NULL; VEC (ddr_p, heap) *dependences = NULL; struct mem_ref_group *gr; struct mem_ref *ref, *refb; VEC (loop_p, heap) *vloops = NULL; unsigned *loop_data_size; unsigned i, j, n; unsigned volume, dist, adist; HOST_WIDE_INT vol; data_reference_p dr; ddr_p dep; if (loop->inner) return; /* Find the outermost loop of the loop nest of loop (we require that there are no sibling loops inside the nest). */ nest = loop; while (1) { aloop = loop_outer (nest); if (aloop == current_loops->tree_root || aloop->inner->next) break; nest = aloop; } /* For each loop, determine the amount of data accessed in each iteration. We use this to estimate whether the reference is evicted from the cache before its reuse. */ find_loop_nest (nest, &vloops); n = VEC_length (loop_p, vloops); loop_data_size = XNEWVEC (unsigned, n); volume = volume_of_references (refs); i = n; while (i-- != 0) { loop_data_size[i] = volume; /* Bound the volume by the L2 cache size, since above this bound, all dependence distances are equivalent. */ if (volume > L2_CACHE_SIZE_BYTES) continue; aloop = VEC_index (loop_p, vloops, i); vol = estimated_loop_iterations_int (aloop, false); if (vol < 0) vol = expected_loop_iterations (aloop); volume *= vol; } /* Prepare the references in the form suitable for data dependence analysis. We ignore unanalyzable data references (the results are used just as a heuristics to estimate temporality of the references, hence we do not need to worry about correctness). */ for (gr = refs; gr; gr = gr->next) for (ref = gr->refs; ref; ref = ref->next) { dr = create_data_ref (nest, ref->mem, ref->stmt, !ref->write_p); if (dr) { ref->reuse_distance = volume; dr->aux = ref; VEC_safe_push (data_reference_p, heap, datarefs, dr); } else no_other_refs = false; } for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) { dist = self_reuse_distance (dr, loop_data_size, n, loop); ref = (struct mem_ref *) dr->aux; if (ref->reuse_distance > dist) ref->reuse_distance = dist; if (no_other_refs) ref->independent_p = true; } compute_all_dependences (datarefs, &dependences, vloops, true); for (i = 0; VEC_iterate (ddr_p, dependences, i, dep); i++) { if (DDR_ARE_DEPENDENT (dep) == chrec_known) continue; ref = (struct mem_ref *) DDR_A (dep)->aux; refb = (struct mem_ref *) DDR_B (dep)->aux; if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know || DDR_NUM_DIST_VECTS (dep) == 0) { /* If the dependence cannot be analyzed, assume that there might be a reuse. */ dist = 0; ref->independent_p = false; refb->independent_p = false; } else { /* The distance vectors are normalized to be always lexicographically positive, hence we cannot tell just from them whether DDR_A comes before DDR_B or vice versa. However, it is not important, anyway -- if DDR_A is close to DDR_B, then it is either reused in DDR_B (and it is not nontemporal), or it reuses the value of DDR_B in cache (and marking it as nontemporal would not affect anything). */ dist = volume; for (j = 0; j < DDR_NUM_DIST_VECTS (dep); j++) { adist = volume_of_dist_vector (DDR_DIST_VECT (dep, j), loop_data_size, n); /* If this is a dependence in the innermost loop (i.e., the distances in all superloops are zero) and it is not the trivial self-dependence with distance zero, record that the references are not completely independent. */ if (lambda_vector_zerop (DDR_DIST_VECT (dep, j), n - 1) && (ref != refb || DDR_DIST_VECT (dep, j)[n-1] != 0)) { ref->independent_p = false; refb->independent_p = false; } /* Ignore accesses closer than L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION, so that we use nontemporal prefetches e.g. if single memory location is accessed several times in a single iteration of the loop. */ if (adist < L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION) continue; if (adist < dist) dist = adist; } } if (ref->reuse_distance > dist) ref->reuse_distance = dist; if (refb->reuse_distance > dist) refb->reuse_distance = dist; } free_dependence_relations (dependences); free_data_refs (datarefs); free (loop_data_size); if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Reuse distances:\n"); for (gr = refs; gr; gr = gr->next) for (ref = gr->refs; ref; ref = ref->next) fprintf (dump_file, " ref %p distance %u\n", (void *) ref, ref->reuse_distance); } }