Stmt IRMutator2::visit(const Block *op) { Stmt first = mutate(op->first); Stmt rest = mutate(op->rest); if (first.same_as(op->first) && rest.same_as(op->rest)) { return op; } return Block::make(std::move(first), std::move(rest)); }
void IRMutator::visit(const Block *op) { Stmt first = mutate(op->first); Stmt rest = mutate(op->rest); if (first.same_as(op->first) && rest.same_as(op->rest)) { stmt = op; } else { stmt = Block::make(std::move(first), std::move(rest)); } }
void IRMutator::visit(const Pipeline *op) { Stmt produce = mutate(op->produce); Stmt update = mutate(op->update); Stmt consume = mutate(op->consume); if (produce.same_as(op->produce) && update.same_as(op->update) && consume.same_as(op->consume)) { stmt = op; } else { stmt = Pipeline::make(op->name, produce, update, consume); } }
void visit(const Realize *op) { Stmt body = mutate(op->body); IsBufferSpecial special(op->name); op->accept(&special); // Get the function associated with this realization, which // contains the explicit fold directives from the schedule. auto func_it = env.find(op->name); Function func = func_it != env.end() ? func_it->second : Function(); if (special.special) { for (const StorageDim &i : func.schedule().storage_dims()) { user_assert(!i.fold_factor.defined()) << "Dimension " << i.var << " of " << op->name << " cannot be folded because it is accessed by extern or device stages.\n"; } debug(3) << "Not attempting to fold " << op->name << " because its buffer is used\n"; if (body.same_as(op->body)) { stmt = op; } else { stmt = Realize::make(op->name, op->types, op->bounds, op->condition, body); } } else { // Don't attempt automatic storage folding if there is // more than one produce node for this func. bool explicit_only = count_producers(body, op->name) != 1; AttemptStorageFoldingOfFunction folder(func, explicit_only); debug(3) << "Attempting to fold " << op->name << "\n"; body = folder.mutate(body); if (body.same_as(op->body)) { stmt = op; } else if (folder.dims_folded.empty()) { stmt = Realize::make(op->name, op->types, op->bounds, op->condition, body); } else { Region bounds = op->bounds; for (size_t i = 0; i < folder.dims_folded.size(); i++) { int d = folder.dims_folded[i].dim; Expr f = folder.dims_folded[i].factor; internal_assert(d >= 0 && d < (int)bounds.size()); bounds[d] = Range(0, f); } stmt = Realize::make(op->name, op->types, bounds, op->condition, body); } } }
void visit(const Realize *op) { Stmt body = mutate(op->body); AttemptStorageFoldingOfFunction folder(op->name); IsBufferSpecial special(op->name); op->accept(&special); if (special.special) { debug(3) << "Not attempting to fold " << op->name << " because it is referenced by an intrinsic\n"; if (body.same_as(op->body)) { stmt = op; } else { stmt = Realize::make(op->name, op->types, op->bounds, body); } } else { debug(3) << "Attempting to fold " << op->name << "\n"; Stmt new_body = folder.mutate(body); if (new_body.same_as(op->body)) { stmt = op; } else if (new_body.same_as(body)) { stmt = Realize::make(op->name, op->types, op->bounds, body); } else { Region bounds = op->bounds; assert(folder.dim_folded >= 0 && folder.dim_folded < (int)bounds.size()); bounds[folder.dim_folded] = Range(0, folder.fold_factor); stmt = Realize::make(op->name, op->types, bounds, new_body); } } }
Stmt inject_tracing(Stmt s, const map<string, Function> &env, Function output) { Stmt original = s; InjectTracing tracing(env, output); // Add a dummy realize block for the output buffers Region output_region; Parameter output_buf = output.output_buffers()[0]; assert(output_buf.is_buffer()); for (int i = 0; i < output.dimensions(); i++) { string d = int_to_string(i); Expr min = Variable::make(Int(32), output_buf.name() + ".min." + d); Expr extent = Variable::make(Int(32), output_buf.name() + ".extent." + d); output_region.push_back(Range(min, extent)); } s = Realize::make(output.name(), output.output_types(), output_region, s); // Inject tracing calls s = tracing.mutate(s); // Strip off the dummy realize block const Realize *r = s.as<Realize>(); assert(r); s = r->body; // Unless tracing was a no-op, add a call to shut down the trace // (which flushes the output stream) if (!s.same_as(original)) { Expr flush = Call::make(Int(32), "halide_shutdown_trace", vector<Expr>(), Call::Extern); s = Block::make(s, AssertStmt::make(flush == 0, "Failed to flush trace", vector<Expr>())); } return s; }
Stmt IRMutator2::visit(const ProducerConsumer *op) { Stmt body = mutate(op->body); if (body.same_as(op->body)) { return op; } return ProducerConsumer::make(op->name, op->is_producer, std::move(body)); }
void visit(const For *for_loop) { if (for_loop->device_api != DeviceAPI::None) { // Don't assume any device API loops are trivial. IRMutator::visit(for_loop); return; } Stmt body = mutate(for_loop->body); if (is_one(for_loop->extent)) { if ((for_loop->for_type == ForType::Parallel) || (for_loop->for_type == ForType::GPUBlock) || (for_loop->for_type == ForType::GPUThread)) { std::cerr << "Warning: Parallel for loop over " << for_loop->name << " has extent one. " << "Can't do one piece of work in parallel.\n"; } else if (for_loop->for_type == ForType::Vectorized) { std::cerr << "Warning: Vectorized for loop over " << for_loop->name << " has extent one. " << "Not vectorizing.\n"; } stmt = LetStmt::make(for_loop->name, for_loop->min, body); } else if (is_zero(for_loop->extent)) { stmt = Evaluate::make(0); } else if (can_prove(for_loop->extent <= 1)) { // Loop has at most one iteration stmt = LetStmt::make(for_loop->name, for_loop->min, body); stmt = IfThenElse::make(for_loop->extent > 0, stmt, Stmt()); } else if (body.same_as(for_loop->body)) { stmt = for_loop; } else { stmt = For::make(for_loop->name, for_loop->min, for_loop->extent, for_loop->for_type, for_loop->device_api, body); } }
void visit(const LetStmt *op) { is_impure = false; Expr value = mutate(op->value); Stmt body = op->body; bool should_pop = false; bool should_erase = false; if (!is_impure) { map<Expr, string, IRDeepCompare>::iterator iter = scope.find(value); if (iter == scope.end()) { scope[value] = op->name; should_pop = true; } else { value = Variable::make(value.type(), iter->second); rewrites[op->name] = iter->second; should_erase = true; } } body = mutate(op->body); if (should_pop) { scope.erase(value); } if (should_erase) { rewrites.erase(op->name); } if (value.same_as(op->value) && body.same_as(op->body)) { stmt = op; } else { stmt = LetStmt::make(op->name, value, body); } }
void IRMutator::visit(const Realize *op) { Region new_bounds(op->bounds.size()); bool bounds_changed = false; // Mutate the bounds for (size_t i = 0; i < op->bounds.size(); i++) { Expr old_min = op->bounds[i].min; Expr old_extent = op->bounds[i].extent; Expr new_min = mutate(old_min); Expr new_extent = mutate(old_extent); if (!new_min.same_as(old_min)) bounds_changed = true; if (!new_extent.same_as(old_extent)) bounds_changed = true; new_bounds[i] = Range(new_min, new_extent); } Stmt body = mutate(op->body); Expr condition = mutate(op->condition); if (!bounds_changed && body.same_as(op->body) && condition.same_as(op->condition)) { stmt = op; } else { stmt = Realize::make(op->name, op->types, new_bounds, condition, body); } }
virtual void visit(const For *for_loop) { // Compute the region required of each function within this loop body map<string, Region> regions = regions_required(for_loop->body); Stmt body = mutate(for_loop->body); log(3) << "Bounds inference considering loop over " << for_loop->name << '\n'; // Inject let statements defining those bounds for (size_t i = 0; i < funcs.size(); i++) { if (in_update.contains(funcs[i])) continue; const Region ®ion = regions[funcs[i]]; const Function &f = env.find(funcs[i])->second; if (region.empty()) continue; log(3) << "Injecting bounds for " << funcs[i] << '\n'; assert(region.size() == f.args().size() && "Dimensionality mismatch between function and region required"); for (size_t j = 0; j < region.size(); j++) { const string &arg_name = f.args()[j]; body = new LetStmt(f.name() + "." + arg_name + ".min", region[j].min, body); body = new LetStmt(f.name() + "." + arg_name + ".extent", region[j].extent, body); } } if (body.same_as(for_loop->body)) { stmt = for_loop; } else { stmt = new For(for_loop->name, for_loop->min, for_loop->extent, for_loop->for_type, body); } }
void IRMutator::visit(const ProducerConsumer *op) { Stmt body = mutate(op->body); if (body.same_as(op->body)) { stmt = op; } else { stmt = ProducerConsumer::make(op->name, op->is_producer, std::move(body)); } }
Stmt IRMutator2::visit(const LetStmt *op) { Expr value = mutate(op->value); Stmt body = mutate(op->body); if (value.same_as(op->value) && body.same_as(op->body)) { return op; } return LetStmt::make(op->name, std::move(value), std::move(body)); }
Stmt inject_marker(Stmt s) { if (injected) return s; if (s.same_as(last_use)) { injected = true; return Block::make(s, make_free(func, inject_device_free)); } else { return mutate(s); } }
void IRMutator::visit(const LetStmt *op) { Expr value = mutate(op->value); Stmt body = mutate(op->body); if (value.same_as(op->value) && body.same_as(op->body)) { stmt = op; } else { stmt = LetStmt::make(op->name, std::move(value), std::move(body)); } }
void visit(const For *for_loop) { Stmt body = mutate(for_loop->body); const IntImm *extent = for_loop->extent.as<IntImm>(); if (extent && extent->value == 1) { stmt = new LetStmt(for_loop->name, for_loop->min, body); } else if (body.same_as(for_loop->body)) { stmt = for_loop; } else { stmt = new For(for_loop->name, for_loop->min, for_loop->extent, for_loop->for_type, body); } }
void visit(const LetStmt *op) { Expr value = mutate(op->value); if (value.type() == Int(32)) alignment_info.push(op->name, modulus_remainder(value, alignment_info)); Stmt body = mutate(op->body); if (value.type() == Int(32)) alignment_info.pop(op->name); if (value.same_as(op->value) && body.same_as(op->body)) { stmt = op; } else { stmt = LetStmt::make(op->name, value, body); } }
void IRMutator::visit(const For *op) { Expr min = mutate(op->min); Expr extent = mutate(op->extent); Stmt body = mutate(op->body); if (min.same_as(op->min) && extent.same_as(op->extent) && body.same_as(op->body)) { stmt = op; } else { stmt = For::make(op->name, min, extent, op->for_type, body); } }
Stmt IRMutator2::visit(const For *op) { Expr min = mutate(op->min); Expr extent = mutate(op->extent); Stmt body = mutate(op->body); if (min.same_as(op->min) && extent.same_as(op->extent) && body.same_as(op->body)) { return op; } return For::make(op->name, std::move(min), std::move(extent), op->for_type, op->device_api, std::move(body)); }
void visit(const Allocate *op) { allocs.push(op->name, 1); Stmt body = mutate(op->body); if (allocs.contains(op->name)) { stmt = body; allocs.pop(op->name); } else if (body.same_as(op->body)) { stmt = op; } else { stmt = Allocate::make(op->name, op->type, op->extents, op->condition, body, op->new_expr, op->free_function); } }
void visit(const Allocate *op) { int idx = get_func_id(op->name); vector<Expr> new_extents; bool all_extents_unmodified = true; for (size_t i = 0; i < op->extents.size(); i++) { new_extents.push_back(mutate(op->extents[i])); all_extents_unmodified &= new_extents[i].same_as(op->extents[i]); } Expr condition = mutate(op->condition); bool on_stack; Expr size = compute_allocation_size(new_extents, condition, op->type, op->name, on_stack); func_alloc_sizes.push(op->name, {on_stack, size}); // compute_allocation_size() might return a zero size, if the allocation is // always conditionally false. remove_dead_allocations() is called after // inject_profiling() so this is a possible scenario. if (!is_zero(size) && on_stack) { const int64_t *int_size = as_const_int(size); internal_assert(int_size != NULL); // Stack size is always a const int func_stack_current[idx] += *int_size; func_stack_peak[idx] = std::max(func_stack_peak[idx], func_stack_current[idx]); debug(3) << " Allocation on stack: " << op->name << "(" << size << ") in pipeline " << pipeline_name << "; current: " << func_stack_current[idx] << "; peak: " << func_stack_peak[idx] << "\n"; } Stmt body = mutate(op->body); Expr new_expr; if (op->new_expr.defined()) { new_expr = mutate(op->new_expr); } if (all_extents_unmodified && body.same_as(op->body) && condition.same_as(op->condition) && new_expr.same_as(op->new_expr)) { stmt = op; } else { stmt = Allocate::make(op->name, op->type, new_extents, condition, body, new_expr, op->free_function); } if (!is_zero(size) && !on_stack) { Expr profiler_pipeline_state = Variable::make(Handle(), "profiler_pipeline_state"); debug(3) << " Allocation on heap: " << op->name << "(" << size << ") in pipeline " << pipeline_name << "\n"; Expr set_task = Call::make(Int(32), "halide_profiler_memory_allocate", {profiler_pipeline_state, idx, size}, Call::Extern); stmt = Block::make(Evaluate::make(set_task), stmt); } }
void visit(const Block *op) { /* First we dig into the block traversing down the 'first' * stmt until we find one that is not a block. We push all of * the rest stmt's into the 'rest' stmt of the top-level * block, and then fix up the 'rest' stmt recursively at the * end. The result of this mutation is an equivalent Block * node that does not contain any Block nodes in a 'first' stmt. */ Stmt first = op->first; Stmt rest = op->rest; while(const Block *first_block = first.as<Block>()) { first = first_block->first; if (first_block->rest.defined()) { rest = rest.defined()? Block::make(first_block->rest, rest): first_block->rest; } } if (first.same_as(op->first)) { rest = mutate(rest); stmt = rest.same_as(op->rest)? op: Block::make(first, rest); } else { stmt = Block::make(first, mutate(rest)); } }
void visit(const LetStmt *op) { Expr value = mutate(op->value); push_name(op->name); string new_name = get_name(op->name); Stmt body = mutate(op->body); pop_name(op->name); if (new_name == op->name && body.same_as(op->body) && value.same_as(op->value)) { stmt = op; } else { stmt = LetStmt::make(new_name, value, body); } }
Stmt IRMutator2::visit(const Realize *op) { Region new_bounds; bool bounds_changed; // Mutate the bounds std::tie(new_bounds, bounds_changed) = mutate_region(this, op->bounds); Stmt body = mutate(op->body); Expr condition = mutate(op->condition); if (!bounds_changed && body.same_as(op->body) && condition.same_as(op->condition)) { return op; } return Realize::make(op->name, op->types, op->memory_type, new_bounds, std::move(condition), std::move(body)); }
void visit(const For *op) { Expr min = mutate(op->min); Expr extent = mutate(op->extent); push_name(op->name); string new_name = get_name(op->name); Stmt body = mutate(op->body); pop_name(op->name); if (new_name == op->name && body.same_as(op->body) && min.same_as(op->min) && extent.same_as(op->extent)) { stmt = op; } else { stmt = For::make(new_name, min, extent, op->for_type, body); } }
void IRMutator::visit(const Allocate *op) { std::vector<Expr> new_extents; bool all_extents_unmodified = true; for (size_t i = 0; i < op->extents.size(); i++) { new_extents.push_back(mutate(op->extents[i])); all_extents_unmodified &= new_extents[i].same_as(op->extents[i]); } Stmt body = mutate(op->body); Expr condition = mutate(op->condition); if (all_extents_unmodified && body.same_as(op->body) && condition.same_as(op->condition)) { stmt = op; } else { stmt = Allocate::make(op->name, op->type, new_extents, condition, body); } }
void visit(const LetStmt *op) { Expr value = simplify(mutate(op->value)); Stmt body; if (is_const(value)) { scope.push(op->name, value); body = mutate(op->body); scope.pop(op->name); } else { body = mutate(op->body); } if (body.same_as(op->body) && value.same_as(op->value)) { stmt = op; } else { stmt = LetStmt::make(op->name, value, body); } }
void visit(const LetStmt *op) { Expr value = mutate(op->value); if (value.type().is_vector()) { scope.push(op->name, value.type()); } Stmt body = mutate(op->body); if (value.type().is_vector()) { scope.pop(op->name); } if (value.same_as(op->value) && body.same_as(op->body)) { stmt = op; } else { stmt = LetStmt::make(op->name, value, body); } }
void visit(const For *for_loop) { Stmt body = mutate(for_loop->body); if (is_one(for_loop->extent) && !CodeGen_GPU_Dev::is_gpu_var(for_loop->name)) { if (for_loop->for_type == ForType::Parallel) { std::cerr << "Warning: Parallel for loop over " << for_loop->name << " has extent one. " << "Can't do one piece of work in parallel.\n"; } else if (for_loop->for_type == ForType::Vectorized) { std::cerr << "Warning: Vectorized for loop over " << for_loop->name << " has extent one. " << "Not vectorizing.\n"; } stmt = LetStmt::make(for_loop->name, for_loop->min, body); } else if (is_zero(for_loop->extent)) { stmt = Evaluate::make(0); } else if (body.same_as(for_loop->body)) { stmt = for_loop; } else { stmt = For::make(for_loop->name, for_loop->min, for_loop->extent, for_loop->for_type, for_loop->device_api, body); } }
Stmt IRMutator2::visit(const Allocate *op) { std::vector<Expr> new_extents; bool all_extents_unmodified = true; for (size_t i = 0; i < op->extents.size(); i++) { new_extents.push_back(mutate(op->extents[i])); all_extents_unmodified &= new_extents[i].same_as(op->extents[i]); } Stmt body = mutate(op->body); Expr condition = mutate(op->condition); Expr new_expr; if (op->new_expr.defined()) { new_expr = mutate(op->new_expr); } if (all_extents_unmodified && body.same_as(op->body) && condition.same_as(op->condition) && new_expr.same_as(op->new_expr)) { return op; } return Allocate::make(op->name, op->type, op->memory_type, new_extents, std::move(condition), std::move(body), std::move(new_expr), op->free_function); }