static bool graphite_initialize (isl_ctx *ctx) { if (number_of_loops (cfun) <= 1 /* FIXME: This limit on the number of basic blocks of a function should be removed when the SCOP detection is faster. */ || n_basic_blocks > PARAM_VALUE (PARAM_GRAPHITE_MAX_BBS_PER_FUNCTION)) { if (dump_file && (dump_flags & TDF_DETAILS)) print_global_statistics (dump_file); isl_ctx_free (ctx); return false; } scev_reset (); recompute_all_dominators (); initialize_original_copy_tables (); cloog_state = cloog_isl_state_malloc (ctx); if (dump_file && dump_flags) dump_function_to_file (current_function_decl, dump_file, dump_flags); return true; }
unsigned int tree_ssa_prefetch_arrays (struct loops *loops) { unsigned i; struct loop *loop; bool unrolled = false; int todo_flags = 0; if (!HAVE_prefetch /* It is possible to ask compiler for say -mtune=i486 -march=pentium4. -mtune=i486 causes us having PREFETCH_BLOCK 0, since this is part of processor costs and i486 does not have prefetch, but -march=pentium4 causes HAVE_prefetch to be true. Ugh. */ || PREFETCH_BLOCK == 0) return 0; initialize_original_copy_tables (); if (!built_in_decls[BUILT_IN_PREFETCH]) { tree type = build_function_type (void_type_node, tree_cons (NULL_TREE, const_ptr_type_node, NULL_TREE)); tree decl = lang_hooks.builtin_function ("__builtin_prefetch", type, BUILT_IN_PREFETCH, BUILT_IN_NORMAL, NULL, NULL_TREE); DECL_IS_NOVOPS (decl) = true; built_in_decls[BUILT_IN_PREFETCH] = decl; } /* We assume that size of cache line is a power of two, so verify this here. */ gcc_assert ((PREFETCH_BLOCK & (PREFETCH_BLOCK - 1)) == 0); for (i = loops->num - 1; i > 0; i--) { loop = loops->parray[i]; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Processing loop %d:\n", loop->num); if (loop) unrolled |= loop_prefetch_arrays (loops, loop); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "\n\n"); } if (unrolled) { scev_reset (); todo_flags |= TODO_cleanup_cfg; } free_original_copy_tables (); return todo_flags; }
static unsigned int tree_ssa_loop_bounds (void) { if (number_of_loops () <= 1) return 0; estimate_numbers_of_iterations (); scev_reset (); return 0; }
static unsigned int tree_ssa_loop_bounds (void) { if (!current_loops) return 0; estimate_numbers_of_iterations (current_loops); scev_reset (); return 0; }
static void graphite_finalize (bool need_cfg_cleanup_p) { if (need_cfg_cleanup_p) { scev_reset (); cleanup_tree_cfg (); profile_status = PROFILE_ABSENT; release_recorded_exits (); tree_estimate_probability (); } cloog_state_free (cloog_state); free_original_copy_tables (); if (dump_file && dump_flags) print_loops (dump_file, 3); }
static bool fini_copy_prop (void) { unsigned i; /* Set the final copy-of value for each variable by traversing the copy-of chains. */ for (i = 1; i < num_ssa_names; i++) { tree var = ssa_name (i); if (!var || !copy_of[i].value || copy_of[i].value == var) continue; /* In theory the points-to solution of all members of the copy chain is their intersection. For now we do not bother to compute this but only make sure we do not lose points-to information completely by setting the points-to solution of the representative to the first solution we find if it doesn't have one already. */ if (copy_of[i].value != var && TREE_CODE (copy_of[i].value) == SSA_NAME) { basic_block copy_of_bb = gimple_bb (SSA_NAME_DEF_STMT (copy_of[i].value)); basic_block var_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); if (POINTER_TYPE_P (TREE_TYPE (var)) && SSA_NAME_PTR_INFO (var) && !SSA_NAME_PTR_INFO (copy_of[i].value)) { duplicate_ssa_name_ptr_info (copy_of[i].value, SSA_NAME_PTR_INFO (var)); /* Points-to information is cfg insensitive, but alignment info might be cfg sensitive, if it e.g. is derived from VRP derived non-zero bits. So, do not copy alignment info if the two SSA_NAMEs aren't defined in the same basic block. */ if (var_bb != copy_of_bb) mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (copy_of[i].value)); } else if (!POINTER_TYPE_P (TREE_TYPE (var)) && SSA_NAME_RANGE_INFO (var) && !SSA_NAME_RANGE_INFO (copy_of[i].value) && var_bb == copy_of_bb) duplicate_ssa_name_range_info (copy_of[i].value, SSA_NAME_RANGE_TYPE (var), SSA_NAME_RANGE_INFO (var)); } } bool changed = substitute_and_fold (get_value, NULL, true); if (changed) { free_numbers_of_iterations_estimates (); if (scev_initialized_p ()) scev_reset (); } free (copy_of); return changed; }
void linear_transform_loops (struct loops *loops) { unsigned int i; compute_immediate_uses (TDFA_USE_OPS | TDFA_USE_VOPS, NULL); for (i = 1; i < loops->num; i++) { unsigned int depth = 0; varray_type datarefs; varray_type dependence_relations; struct loop *loop_nest = loops->parray[i]; struct loop *temp; VEC (tree) *oldivs = NULL; VEC (tree) *invariants = NULL; lambda_loopnest before, after; lambda_trans_matrix trans; bool problem = false; bool need_perfect_nest = false; /* If it's not a loop nest, we don't want it. We also don't handle sibling loops properly, which are loops of the following form: for (i = 0; i < 50; i++) { for (j = 0; j < 50; j++) { ... } for (j = 0; j < 50; j++) { ... } } */ if (!loop_nest->inner) continue; depth = 1; for (temp = loop_nest->inner; temp; temp = temp->inner) { flow_loop_scan (temp, LOOP_ALL); /* If we have a sibling loop or multiple exit edges, jump ship. */ if (temp->next || temp->num_exits != 1) { problem = true; break; } depth ++; } if (problem) continue; /* Analyze data references and dependence relations using scev. */ VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs"); VARRAY_GENERIC_PTR_INIT (dependence_relations, 10, "dependence_relations"); compute_data_dependences_for_loop (depth, loop_nest, &datarefs, &dependence_relations); if (dump_file && (dump_flags & TDF_DETAILS)) { unsigned int j; for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++) { struct data_dependence_relation *ddr = (struct data_dependence_relation *) VARRAY_GENERIC_PTR (dependence_relations, j); if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) { fprintf (dump_file, "DISTANCE_V ("); print_lambda_vector (dump_file, DDR_DIST_VECT (ddr), DDR_SIZE_VECT (ddr)); fprintf (dump_file, ")\n"); fprintf (dump_file, "DIRECTION_V ("); print_lambda_vector (dump_file, DDR_DIR_VECT (ddr), DDR_SIZE_VECT (ddr)); fprintf (dump_file, ")\n"); } } fprintf (dump_file, "\n\n"); } /* Build the transformation matrix. */ trans = lambda_trans_matrix_new (depth, depth); lambda_matrix_id (LTM_MATRIX (trans), depth); trans = try_interchange_loops (trans, depth, dependence_relations, datarefs, loop_nest); if (lambda_trans_matrix_id_p (trans)) { if (dump_file) fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n"); continue; } /* Check whether the transformation is legal. */ if (!lambda_transform_legal_p (trans, depth, dependence_relations)) { if (dump_file) fprintf (dump_file, "Can't transform loop, transform is illegal:\n"); continue; } if (!perfect_nest_p (loop_nest)) need_perfect_nest = true; before = gcc_loopnest_to_lambda_loopnest (loops, loop_nest, &oldivs, &invariants, need_perfect_nest); if (!before) continue; if (dump_file) { fprintf (dump_file, "Before:\n"); print_lambda_loopnest (dump_file, before, 'i'); } after = lambda_loopnest_transform (before, trans); if (dump_file) { fprintf (dump_file, "After:\n"); print_lambda_loopnest (dump_file, after, 'u'); } lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants, after, trans); if (dump_file) fprintf (dump_file, "Successfully transformed loop.\n"); oldivs = NULL; invariants = NULL; free_dependence_relations (dependence_relations); free_data_refs (datarefs); } free_df (); scev_reset (); rewrite_into_loop_closed_ssa (); #ifdef ENABLE_CHECKING verify_loop_closed_ssa (); #endif }
unsigned int tree_ssa_prefetch_arrays (void) { loop_iterator li; struct loop *loop; bool unrolled = false; int todo_flags = 0; if (!HAVE_prefetch /* It is possible to ask compiler for say -mtune=i486 -march=pentium4. -mtune=i486 causes us having PREFETCH_BLOCK 0, since this is part of processor costs and i486 does not have prefetch, but -march=pentium4 causes HAVE_prefetch to be true. Ugh. */ || PREFETCH_BLOCK == 0) return 0; if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Prefetching parameters:\n"); fprintf (dump_file, " simultaneous prefetches: %d\n", SIMULTANEOUS_PREFETCHES); fprintf (dump_file, " prefetch latency: %d\n", PREFETCH_LATENCY); fprintf (dump_file, " prefetch block size: %d\n", PREFETCH_BLOCK); fprintf (dump_file, " L1 cache size: %d lines, %d kB\n", L1_CACHE_SIZE_BYTES / L1_CACHE_LINE_SIZE, L1_CACHE_SIZE); fprintf (dump_file, " L1 cache line size: %d\n", L1_CACHE_LINE_SIZE); fprintf (dump_file, " L2 cache size: %d kB\n", L2_CACHE_SIZE); fprintf (dump_file, "\n"); } initialize_original_copy_tables (); if (!built_in_decls[BUILT_IN_PREFETCH]) { tree type = build_function_type (void_type_node, tree_cons (NULL_TREE, const_ptr_type_node, NULL_TREE)); tree decl = add_builtin_function ("__builtin_prefetch", type, BUILT_IN_PREFETCH, BUILT_IN_NORMAL, NULL, NULL_TREE); DECL_IS_NOVOPS (decl) = true; built_in_decls[BUILT_IN_PREFETCH] = decl; } /* We assume that size of cache line is a power of two, so verify this here. */ gcc_assert ((PREFETCH_BLOCK & (PREFETCH_BLOCK - 1)) == 0); FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Processing loop %d:\n", loop->num); unrolled |= loop_prefetch_arrays (loop); if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "\n\n"); } if (unrolled) { scev_reset (); todo_flags |= TODO_cleanup_cfg; } free_original_copy_tables (); return todo_flags; }