void linear_transform_loops (struct loops *loops) { unsigned int i; compute_immediate_uses (TDFA_USE_OPS | TDFA_USE_VOPS, NULL); for (i = 1; i < loops->num; i++) { unsigned int depth = 0; varray_type datarefs; varray_type dependence_relations; struct loop *loop_nest = loops->parray[i]; struct loop *temp; VEC (tree) *oldivs = NULL; VEC (tree) *invariants = NULL; lambda_loopnest before, after; lambda_trans_matrix trans; bool problem = false; bool need_perfect_nest = false; /* If it's not a loop nest, we don't want it. We also don't handle sibling loops properly, which are loops of the following form: for (i = 0; i < 50; i++) { for (j = 0; j < 50; j++) { ... } for (j = 0; j < 50; j++) { ... } } */ if (!loop_nest->inner) continue; depth = 1; for (temp = loop_nest->inner; temp; temp = temp->inner) { flow_loop_scan (temp, LOOP_ALL); /* If we have a sibling loop or multiple exit edges, jump ship. */ if (temp->next || temp->num_exits != 1) { problem = true; break; } depth ++; } if (problem) continue; /* Analyze data references and dependence relations using scev. */ VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs"); VARRAY_GENERIC_PTR_INIT (dependence_relations, 10, "dependence_relations"); compute_data_dependences_for_loop (depth, loop_nest, &datarefs, &dependence_relations); if (dump_file && (dump_flags & TDF_DETAILS)) { unsigned int j; for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++) { struct data_dependence_relation *ddr = (struct data_dependence_relation *) VARRAY_GENERIC_PTR (dependence_relations, j); if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) { fprintf (dump_file, "DISTANCE_V ("); print_lambda_vector (dump_file, DDR_DIST_VECT (ddr), DDR_SIZE_VECT (ddr)); fprintf (dump_file, ")\n"); fprintf (dump_file, "DIRECTION_V ("); print_lambda_vector (dump_file, DDR_DIR_VECT (ddr), DDR_SIZE_VECT (ddr)); fprintf (dump_file, ")\n"); } } fprintf (dump_file, "\n\n"); } /* Build the transformation matrix. */ trans = lambda_trans_matrix_new (depth, depth); lambda_matrix_id (LTM_MATRIX (trans), depth); trans = try_interchange_loops (trans, depth, dependence_relations, datarefs, loop_nest); if (lambda_trans_matrix_id_p (trans)) { if (dump_file) fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n"); continue; } /* Check whether the transformation is legal. */ if (!lambda_transform_legal_p (trans, depth, dependence_relations)) { if (dump_file) fprintf (dump_file, "Can't transform loop, transform is illegal:\n"); continue; } if (!perfect_nest_p (loop_nest)) need_perfect_nest = true; before = gcc_loopnest_to_lambda_loopnest (loops, loop_nest, &oldivs, &invariants, need_perfect_nest); if (!before) continue; if (dump_file) { fprintf (dump_file, "Before:\n"); print_lambda_loopnest (dump_file, before, 'i'); } after = lambda_loopnest_transform (before, trans); if (dump_file) { fprintf (dump_file, "After:\n"); print_lambda_loopnest (dump_file, after, 'u'); } lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants, after, trans); if (dump_file) fprintf (dump_file, "Successfully transformed loop.\n"); oldivs = NULL; invariants = NULL; free_dependence_relations (dependence_relations); free_data_refs (datarefs); } free_df (); scev_reset (); rewrite_into_loop_closed_ssa (); #ifdef ENABLE_CHECKING verify_loop_closed_ssa (); #endif }
static void gather_interchange_stats (varray_type dependence_relations, varray_type datarefs, struct loop *loop, struct loop *first_loop, unsigned int *dependence_steps, unsigned int *nb_deps_not_carried_by_loop, unsigned int *access_strides) { unsigned int i; *dependence_steps = 0; *nb_deps_not_carried_by_loop = 0; *access_strides = 0; for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++) { int dist; struct data_dependence_relation *ddr = (struct data_dependence_relation *) VARRAY_GENERIC_PTR (dependence_relations, i); /* If we don't know anything about this dependence, or the distance vector is NULL, or there is no dependence, then there is no reuse of data. */ if (DDR_DIST_VECT (ddr) == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know || DDR_ARE_DEPENDENT (ddr) == chrec_known) continue; dist = DDR_DIST_VECT (ddr)[loop->depth - first_loop->depth]; if (dist == 0) (*nb_deps_not_carried_by_loop) += 1; else if (dist < 0) (*dependence_steps) += -dist; else (*dependence_steps) += dist; } /* Compute the access strides. */ for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++) { unsigned int it; struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i); tree stmt = DR_STMT (dr); struct loop *stmt_loop = loop_containing_stmt (stmt); struct loop *inner_loop = first_loop->inner; if (inner_loop != stmt_loop && !flow_loop_nested_p (inner_loop, stmt_loop)) continue; for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++) { tree chrec = DR_ACCESS_FN (dr, it); tree tstride = evolution_part_in_loop_num (chrec, loop->num); if (tstride == NULL_TREE || TREE_CODE (tstride) != INTEGER_CST) continue; (*access_strides) += int_cst_value (tstride); } } }
static void determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs, bool no_other_refs) { struct loop *nest, *aloop; VEC (data_reference_p, heap) *datarefs = NULL; VEC (ddr_p, heap) *dependences = NULL; struct mem_ref_group *gr; struct mem_ref *ref, *refb; VEC (loop_p, heap) *vloops = NULL; unsigned *loop_data_size; unsigned i, j, n; unsigned volume, dist, adist; HOST_WIDE_INT vol; data_reference_p dr; ddr_p dep; if (loop->inner) return; /* Find the outermost loop of the loop nest of loop (we require that there are no sibling loops inside the nest). */ nest = loop; while (1) { aloop = loop_outer (nest); if (aloop == current_loops->tree_root || aloop->inner->next) break; nest = aloop; } /* For each loop, determine the amount of data accessed in each iteration. We use this to estimate whether the reference is evicted from the cache before its reuse. */ find_loop_nest (nest, &vloops); n = VEC_length (loop_p, vloops); loop_data_size = XNEWVEC (unsigned, n); volume = volume_of_references (refs); i = n; while (i-- != 0) { loop_data_size[i] = volume; /* Bound the volume by the L2 cache size, since above this bound, all dependence distances are equivalent. */ if (volume > L2_CACHE_SIZE_BYTES) continue; aloop = VEC_index (loop_p, vloops, i); vol = estimated_loop_iterations_int (aloop, false); if (vol < 0) vol = expected_loop_iterations (aloop); volume *= vol; } /* Prepare the references in the form suitable for data dependence analysis. We ignore unanalyzable data references (the results are used just as a heuristics to estimate temporality of the references, hence we do not need to worry about correctness). */ for (gr = refs; gr; gr = gr->next) for (ref = gr->refs; ref; ref = ref->next) { dr = create_data_ref (nest, ref->mem, ref->stmt, !ref->write_p); if (dr) { ref->reuse_distance = volume; dr->aux = ref; VEC_safe_push (data_reference_p, heap, datarefs, dr); } else no_other_refs = false; } for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) { dist = self_reuse_distance (dr, loop_data_size, n, loop); ref = (struct mem_ref *) dr->aux; if (ref->reuse_distance > dist) ref->reuse_distance = dist; if (no_other_refs) ref->independent_p = true; } compute_all_dependences (datarefs, &dependences, vloops, true); for (i = 0; VEC_iterate (ddr_p, dependences, i, dep); i++) { if (DDR_ARE_DEPENDENT (dep) == chrec_known) continue; ref = (struct mem_ref *) DDR_A (dep)->aux; refb = (struct mem_ref *) DDR_B (dep)->aux; if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know || DDR_NUM_DIST_VECTS (dep) == 0) { /* If the dependence cannot be analyzed, assume that there might be a reuse. */ dist = 0; ref->independent_p = false; refb->independent_p = false; } else { /* The distance vectors are normalized to be always lexicographically positive, hence we cannot tell just from them whether DDR_A comes before DDR_B or vice versa. However, it is not important, anyway -- if DDR_A is close to DDR_B, then it is either reused in DDR_B (and it is not nontemporal), or it reuses the value of DDR_B in cache (and marking it as nontemporal would not affect anything). */ dist = volume; for (j = 0; j < DDR_NUM_DIST_VECTS (dep); j++) { adist = volume_of_dist_vector (DDR_DIST_VECT (dep, j), loop_data_size, n); /* If this is a dependence in the innermost loop (i.e., the distances in all superloops are zero) and it is not the trivial self-dependence with distance zero, record that the references are not completely independent. */ if (lambda_vector_zerop (DDR_DIST_VECT (dep, j), n - 1) && (ref != refb || DDR_DIST_VECT (dep, j)[n-1] != 0)) { ref->independent_p = false; refb->independent_p = false; } /* Ignore accesses closer than L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION, so that we use nontemporal prefetches e.g. if single memory location is accessed several times in a single iteration of the loop. */ if (adist < L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION) continue; if (adist < dist) dist = adist; } } if (ref->reuse_distance > dist) ref->reuse_distance = dist; if (refb->reuse_distance > dist) refb->reuse_distance = dist; } free_dependence_relations (dependences); free_data_refs (datarefs); free (loop_data_size); if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Reuse distances:\n"); for (gr = refs; gr; gr = gr->next) for (ref = gr->refs; ref; ref = ref->next) fprintf (dump_file, " ref %p distance %u\n", (void *) ref, ref->reuse_distance); } }