static void pdr_stride_in_loop (mpz_t stride, graphite_dim_t depth, poly_dr_p pdr) { poly_bb_p pbb = PDR_PBB (pdr); isl_map *map; isl_set *set; isl_aff *aff; isl_space *dc; isl_constraint *lma, *c; isl_int islstride; graphite_dim_t time_depth; unsigned offset, nt; unsigned i; /* XXX isl rewrite following comments. */ /* Builds a partial difference equations and inserts them into pointset powerset polyhedron P. Polyhedron is assumed to have the format: T|I|T'|I'|G|S|S'|l1|l2. TIME_DEPTH is the time dimension w.r.t. which we are differentiating. OFFSET represents the number of dimensions between columns t_{time_depth} and t'_{time_depth}. DIM_SCTR is the number of scattering dimensions. It is essentially the dimensionality of the T vector. The following equations are inserted into the polyhedron P: | t_1 = t_1' | ... | t_{time_depth-1} = t'_{time_depth-1} | t_{time_depth} = t'_{time_depth} + 1 | t_{time_depth+1} = t'_{time_depth + 1} | ... | t_{dim_sctr} = t'_{dim_sctr}. */ /* Add the equality: t_{time_depth} = t'_{time_depth} + 1. This is the core part of this alogrithm, since this constraint asks for the memory access stride (difference) between two consecutive points in time dimensions. */ /* Add equalities: | t1 = t1' | ... | t_{time_depth-1} = t'_{time_depth-1} | t_{time_depth+1} = t'_{time_depth+1} | ... | t_{dim_sctr} = t'_{dim_sctr} This means that all the time dimensions are equal except for time_depth, where the constraint is t_{depth} = t'_{depth} + 1 step. More to this: we should be careful not to add equalities to the 'coupled' dimensions, which happens when the one dimension is stripmined dimension, and the other dimension corresponds to the point loop inside stripmined dimension. */ /* pdr->accesses: [P1..nb_param,I1..nb_domain]->[a,S1..nb_subscript] ??? [P] not used for PDRs? pdr->extent: [a,S1..nb_subscript] pbb->domain: [P1..nb_param,I1..nb_domain] pbb->transformed: [P1..nb_param,I1..nb_domain]->[T1..Tnb_sctr] [T] includes local vars (currently unused) First we create [P,I] -> [T,a,S]. */ map = isl_map_flat_range_product (isl_map_copy (pbb->transformed), isl_map_copy (pdr->accesses)); /* Add a dimension for L: [P,I] -> [T,a,S,L].*/ map = isl_map_add_dims (map, isl_dim_out, 1); /* Build a constraint for "lma[S] - L == 0", effectively calculating L in terms of subscripts. */ lma = build_linearized_memory_access (map, pdr); /* And add it to the map, so we now have: [P,I] -> [T,a,S,L] : lma([S]) == L. */ map = isl_map_add_constraint (map, lma); /* Then we create [P,I,P',I'] -> [T,a,S,L,T',a',S',L']. */ map = isl_map_flat_product (map, isl_map_copy (map)); /* Now add the equality T[time_depth] == T'[time_depth]+1. This will force L' to be the linear address at T[time_depth] + 1. */ time_depth = psct_dynamic_dim (pbb, depth); /* Length of [a,S] plus [L] ... */ offset = 1 + isl_map_dim (pdr->accesses, isl_dim_out); /* ... plus [T]. */ offset += isl_map_dim (pbb->transformed, isl_dim_out); c = isl_equality_alloc (isl_local_space_from_space (isl_map_get_space (map))); c = isl_constraint_set_coefficient_si (c, isl_dim_out, time_depth, 1); c = isl_constraint_set_coefficient_si (c, isl_dim_out, offset + time_depth, -1); c = isl_constraint_set_constant_si (c, 1); map = isl_map_add_constraint (map, c); /* Now we equate most of the T/T' elements (making PITaSL nearly the same is (PITaSL)', except for one dimension, namely for 'depth' (an index into [I]), after translating to index into [T]. Take care to not produce an empty map, which indicates we wanted to equate two dimensions that are already coupled via the above time_depth dimension. Happens with strip mining where several scatter dimension are interdependend. */ /* Length of [T]. */ nt = pbb_nb_scattering_transform (pbb) + pbb_nb_local_vars (pbb); for (i = 0; i < nt; i++) if (i != time_depth) { isl_map *temp = isl_map_equate (isl_map_copy (map), isl_dim_out, i, isl_dim_out, offset + i); if (isl_map_is_empty (temp)) isl_map_free (temp); else { isl_map_free (map); map = temp; } } /* Now maximize the expression L' - L. */ set = isl_map_range (map); dc = isl_set_get_space (set); aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc)); aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset - 1, -1); aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset + offset - 1, 1); isl_int_init (islstride); isl_set_max (set, aff, &islstride); isl_int_get_gmp (islstride, stride); isl_int_clear (islstride); isl_aff_free (aff); isl_set_free (set); if (dump_file && (dump_flags & TDF_DETAILS)) { char *str; void (*gmp_free) (void *, size_t); fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d:", pbb_index (pbb), PDR_ID (pdr), (int) depth); str = mpz_get_str (0, 10, stride); fprintf (dump_file, " %s ", str); mp_get_memory_functions (NULL, NULL, &gmp_free); (*gmp_free) (str, strlen (str) + 1); } }
bool ast_gen::current_schedule_dimension_is_parallel(isl_ast_build * builder) { isl::union_map schedule = isl_ast_build_get_schedule(builder); isl::space schedule_space = isl_ast_build_get_schedule_space(builder); int dimension = schedule_space.dimension(isl::space::output) - 1; isl::printer printer(m_model.context); if (verbose<ast_gen>::enabled()) { cout << " Schedule: " << endl; printer.print_each_in(schedule); cout << " Current dimension = " << dimension << endl; } auto dependencies = m_order; dependencies.map_domain_through(schedule); dependencies.map_range_through(schedule); if (dependencies.is_empty()) { return true; } auto schedule_deps = dependencies.single_map(); if (verbose<ast_gen>::enabled()) { cout << " Schedule dependencies: " << endl; printer.print_each_in(schedule_deps); } for (int i = 0; i < dimension; i++) schedule_deps = isl_map_equate(schedule_deps.copy(), isl_dim_out, i, isl_dim_in, i); if (verbose<ast_gen>::enabled()) { cout << " Unsatisfied schedule dependencies: " << endl; printer.print_each_in(schedule_deps); } auto all_zero_deps = isl::map::universe(schedule_deps.get_space()); all_zero_deps = isl_map_equate(all_zero_deps.copy(), isl_dim_out, dimension, isl_dim_in, dimension); bool is_parallel = schedule_deps.is_subset_of(all_zero_deps); if (!is_parallel && verbose<ast_gen>::enabled()) { auto violating_deps = schedule_deps; violating_deps.subtract(all_zero_deps); cout << " Dependencies preventing parallelization: " << endl; printer.print_each_in(violating_deps); } return is_parallel; }