int main(int argc, char **argv) { // The camera pipe is specialized on the 2592x1968 images that // come in, so we'll just use an image instead of a uniform image. ImageParam input(UInt(16), 2); ImageParam matrix_3200(Float(32), 2, "m3200"), matrix_7000(Float(32), 2, "m7000"); Param<float> color_temp("color_temp"); //, 3200.0f); Param<float> gamma("gamma"); //, 1.8f); Param<float> contrast("contrast"); //, 10.0f); // shift things inwards to give us enough padding on the // boundaries so that we don't need to check bounds. We're going // to make a 2560x1920 output image, just like the FCam pipe, so // shift by 16, 12 Func shifted; shifted(x, y) = input(x+16, y+12); // Parameterized output type, because LLVM PTX (GPU) backend does not // currently allow 8-bit computations int bit_width = atoi(argv[1]); Type result_type = UInt(bit_width); // Pick a schedule schedule = atoi(argv[2]); // Build the pipeline Func processed = process(shifted, result_type, matrix_3200, matrix_7000, color_temp, gamma, contrast); // We can generate slightly better code if we know the output is a whole number of tiles. Expr out_width = processed.output_buffer().width(); Expr out_height = processed.output_buffer().height(); processed .bound(tx, 0, (out_width/32)*32) .bound(ty, 0, (out_height/32)*32); //string s = processed.serialize(); //printf("%s\n", s.c_str()); std::vector<Argument> args = {color_temp, gamma, contrast, input, matrix_3200, matrix_7000}; processed.compile_to_file("curved", args); processed.compile_to_assembly("curved.s", args); return 0; }
int main(int argc, char **argv) { Func f; Var x, y; f(x, y) = x + y; f.parallel(x); // Having more threads than tasks shouldn't hurt performance too much. double correct_time = 0; for (int t = 2; t <= 64; t *= 2) { std::ostringstream ss; ss << "HL_NUM_THREADS=" << t; std::string str = ss.str(); char buf[32] = {0}; memcpy(buf, str.c_str(), str.size()); putenv(buf); Halide::Internal::JITSharedRuntime::release_all(); f.compile_jit(); // Start the thread pool without giving any hints as to the // number of tasks we'll be using. f.realize(t, 1); double min_time = 1e20; for (int i = 0; i < 3; i++) { double t1 = current_time(); f.realize(2, 1000000); double t2 = current_time() - t1; if (t2 < min_time) min_time = t2; } printf("%d: %f ms\n", t, min_time); if (t == 2) { correct_time = min_time; } else if (min_time > correct_time * 5) { printf("Unacceptable overhead when using %d threads for 2 tasks: %f ms vs %f ms\n", t, min_time, correct_time); return -1; } } printf("Success!\n"); return 0; }
int main(int argc, char **argv) { Func f; Var x; f(x) = sin(x); f.compute_root(); const int N = 9; std::vector<Expr> exprs; for (int i = 0; i < N; i++) { exprs.push_back(f(i)); } exprs = bitonic_sort(exprs); std::cout << exprs.size() << "\n"; // Use update definitions to write them to another Func in sorted // order for inspection. Note that doing this doesn't explicitly // share work between each element - it'll generate the huge // min/max expression to extract each sorted element. llvm should // lift out common subexpressions though. Func g; g(x) = undef<float>(); for (int i = 0; i < N; i++) { g(i) = exprs[i]; } Buffer<float> result = g.realize(N); for (int i = 0; i < N; i++) { printf("%f ", result(i)); } printf("\n"); for (int i = 0; i < N-1; i++) { if (result(i) >= result(i+1)) { printf("Results were not in order\n"); return -1; } } return 0; }
int main(int argc, char **argv) { Var x, y; const int size = 32; Image<double> noise(size, size); for (int i = 0; i < size; i++) { for (int j = 0; j < size; j++) { noise(j,i) = (double)rand() / RAND_MAX; } } // Define a seam carving-esque energy // The meaning of this depends on the interleaving of the x and y // dimensions during the reduction update Func clamped; clamped(x, y) = noise(clamp(x, 0, size-1), clamp(y, 0, size-1)); Func energy; RDom ry(1, noise.height()-1); energy(x, y) = clamped(x, y); Expr xm = clamp(x-1, 0, size-1); Expr xp = clamp(x+1, 0, size-1); energy(x, ry) = clamped(x, ry) + min(min(energy(xm, ry-1), energy(x, ry-1)), energy(xp, ry-1)); Image<double> im_energy = energy.realize(size,size); Image<double> ref_energy(noise); for (int y = 1; y < size; y++) { for (int x = 0; x < size; x++) { int xm = std::max(x-1, 0); int xp = std::min(x+1, size-1); double incr = std::min(ref_energy(xm, y-1), std::min(ref_energy(x, y-1), ref_energy(xp, y-1))); ref_energy(x, y) += incr; if (ref_energy(x,y) != im_energy(x,y)) { printf("energy(%d,%d) was %f instead of %f\n", x, y, im_energy(x,y), ref_energy(x,y)); return -1; } } } printf("Success!\n"); return 0; }
int rdom_wrapper_test() { Func source("source"), g("g"), result("result"); Var x("x"), y("y"); source(x, y) = x + y; ImageParam img(Int(32), 2, "img"); Buffer<int> buf = source.realize(200, 200); img.set(buf); g(x, y) = 10; g(x, y) += 2 * img(x, x); g(x, y) += 3 * img(y, y); // Make a global wrapper on 'g', so that we can schedule initialization // and the update on the same compute level at the global wrapper Func wrapper = g.in().compute_root(); g.compute_at(wrapper, x); Func img_f = img; img_f.compute_root(); // Check the call graphs. // Expect 'wrapper' to call 'g', initialization of 'g' to call nothing // and its update to call 'img_f' and 'g', 'img_f' to call 'img' Module m = wrapper.compile_to_module({wrapper.infer_arguments()}); CheckCalls c; m.functions().front().body.accept(&c); CallGraphs expected = { {g.name(), {img_f.name(), g.name()}}, {wrapper.name(), {g.name()}}, {img_f.name(), {img.name()}}, }; if (check_call_graphs(c.calls, expected) != 0) { return -1; } Buffer<int> im = wrapper.realize(200, 200); auto func = [](int x, int y) { return 4*x + 6* y + 10; }; if (check_image(im, func)) { return -1; } return 0; }
int main(int argc, char **argv) { ImageParam input(UInt(8), 1); input.dim(0).set_bounds(0, size); { Func f; Var x; f(x) = input(x); // Output must have the same size as the input. f.output_buffer().dim(0).set_bounds(input.dim(0).min(), input.dim(0).extent()); f.add_custom_lowering_pass(new Validator); f.compile_jit(); Buffer<uint8_t> dummy(size); dummy.fill(42); input.set(dummy); Buffer<uint8_t> out = f.realize(size); if (!out.all_equal(42)) { std::cerr << "wrong output" << std::endl; exit(-1); } } { Func f; Var x; f(x) = undef(UInt(8)); RDom r(input); f(r.x) = cast<uint8_t>(42); f.add_custom_lowering_pass(new Validator); f.compile_jit(); Buffer<uint8_t> dummy(size); input.set(dummy); Buffer<uint8_t> out = f.realize(size); if (!out.all_equal(42)) { std::cerr << "wrong output" << std::endl; exit(-1); } } std::cout << "Success!" << std::endl; return 0; }
bool ShiftBilinearDataTest(bool create, int width, int height, View::Format format, const Func & f) { bool result = true; Data data(f.description); std::cout << (create ? "Create" : "Verify") << " test " << f.description << " [" << width << ", " << height << "]." << std::endl; View s(width, height, format, NULL, TEST_ALIGN(width)); View b(width, height, format, NULL, TEST_ALIGN(width)); View d1(width, height, format, NULL, TEST_ALIGN(width)); View d2(width, height, format, NULL, TEST_ALIGN(width)); const double dx = -5.3, dy = 3.7; const int crop = 3; if(create) { FillRandom(s); FillRandom(b); TEST_SAVE(s); TEST_SAVE(b); f.Call(s, b, dx, dy, crop, crop, width - crop, height - crop, d1); TEST_SAVE(d1); } else { TEST_LOAD(s); TEST_LOAD(b); TEST_LOAD(d1); f.Call(s, b, dx, dy, crop, crop, width - crop, height - crop, d2); TEST_SAVE(d2); result = result && Compare(d1, d2, 0, true, 64); } return result; }
int main(int argc, char **argv) { Param<float> reservoirConcentration; Param<float> stepTime; Param<float> layerMixConst; Param<float> layerTimeDivisor; Func sumDx; Func layerMixed; Func initialDeveloperMirrored; ImageParam devConc(type_of<float>(),2); Func dDevelConc; Func developerConcentration = lambda(x,y,devConc(x,y)); dDevelConc = calcLayerMix(developerConcentration, layerMixConst, stepTime, layerTimeDivisor, reservoirConcentration); std::vector<Argument> ddcArgs = dDevelConc.infer_arguments(); dDevelConc.compile_to_file("calcLayerMix",ddcArgs); return 0; }
Func blur(Func input, Expr sigma, Expr width, Expr height) { // Compute IIR coefficients using the method of Young and Van Vliet. Func coeff; Expr q = select(sigma < 2.5f, 3.97156f - 4.14554f*sqrt(1 - 0.26891f*sigma), 0.98711f*sigma - 0.96330f); Expr denom = 1.57825f + 2.44413f*q + 1.4281f*q*q + 0.422205f*q*q*q; coeff(x) = undef<float>(); coeff(1) = (2.44413f*q + 2.85619f*q*q + 1.26661f*q*q*q)/denom; coeff(2) = -(1.4281f*q*q + 1.26661f*q*q*q)/denom; coeff(3) = (0.422205f*q*q*q)/denom; coeff(0) = 1 - (coeff(1) + coeff(2) + coeff(3)); coeff.compute_root(); Func blurY, blurX; blurY = blur_then_transpose(input, coeff, height, sigma); blurX = blur_then_transpose(blurY, coeff, width, sigma); return blurX; }
Func* Func::clone(Class* cls, const StringData* name) const { auto numParams = this->numParams(); Func* f = new (allocFuncMem( m_name, numParams, isClosureBody(), cls || !preClass())) Func(*this); f->initPrologues(numParams); f->m_funcId = InvalidFuncId; if (name) { f->m_name = name; } if (cls != f->m_cls) { f->m_cls = cls; } f->setFullName(numParams); f->m_profCounter = 0; return f; }
std::vector<int> Expr::footprint(const Func& f) const { MLVal fp = footprintOfFuncInExpr(f.name(), contents->node); assert(!listEmpty(fp)); std::vector<int> footprint; for (; !listEmpty(fp); fp = listTail(fp)) { footprint.push_back(int(listHead(fp))); } return footprint; }
bool AnyToAnyAutoTest(int width, int height, View::Format srcType, View::Format dstType, const Func & f1, const Func & f2) { bool result = true; TEST_LOG_SS(Info, "Test " << f1.description << " & " << f2.description << " for size [" << width << "," << height << "]."); View src(width, height, srcType, NULL, TEST_ALIGN(width)); FillRandom(src); View dst1(width, height, dstType, NULL, TEST_ALIGN(width)); View dst2(width, height, dstType, NULL, TEST_ALIGN(width)); TEST_EXECUTE_AT_LEAST_MIN_TIME(f1.Call(src, dst1)); TEST_EXECUTE_AT_LEAST_MIN_TIME(f2.Call(src, dst2)); result = result && Compare(dst1, dst2, 0, true, 64); return result; }
int main(int argc, char **argv) { ImageParam input(Float(32), 2); Var x, y, z; RDom dom(0, input.width()*8); Func f; Expr hard_to_reason_about = cast<int>(hypot(input.width(), input.height())); f(x, y, z) = 1; f(x, y, dom / hard_to_reason_about) += 1; f.compile_jit(); Image<float> im(32, 32); input.set(im); f.realize(100, 100, 16); printf("Success!\n"); return 0; }
//Convolution Func ifft2_c2r(Func input, int W, int H) { Target target = get_target_from_environment(); Fft2dDesc fwd_desc; Fft2dDesc inv_desc; inv_desc.gain = 1.0f/(W*H); //Make complex ComplexFunc input_complex; input_complex(x, y, c) = {input(x, y, c, 0), input(x, y, c, 1)}; // Compute the inverse DFT Func res = fft2d_c2r(input_complex, W, H, target, inv_desc); //Schedule res.compute_root(); return res; }
bool StretchGrayDataTest(bool create, int width, int height, const Func & f, int stretch) { bool result = true; Data data(f.description); std::cout << (create ? "Create" : "Verify") << " test " << f.description << " [" << width << ", " << height << "]." << std::endl; const int stretchedWidth = width*stretch; const int stretchedHeight = height*stretch; View s(width, height, View::Gray8, NULL, TEST_ALIGN(width)); View d1(stretchedWidth, stretchedHeight, View::Gray8, NULL, TEST_ALIGN(stretchedWidth)); View d2(stretchedWidth, stretchedHeight, View::Gray8, NULL, TEST_ALIGN(stretchedWidth)); if(create) { FillRandom(s); TEST_SAVE(s); f.Call(s, d1); TEST_SAVE(d1); } else { TEST_LOAD(s); TEST_LOAD(d1); f.Call(s, d2); TEST_SAVE(d2); result = result && Compare(d1, d2, 0, true, 64); } return result; }
value_type Stokhos::KL::OneDExponentialCovarianceFunction<value_type>:: bisection(const Func& func, const value_type& a, const value_type& b, magnitude_type tol, int max_num_its) { value_type low, hi; value_type fa = func.eval(a); value_type fb = func.eval(b); TEUCHOS_TEST_FOR_EXCEPTION(fa*fb > value_type(0.0), std::logic_error, "Bounds [" << a << "," << b << "] must bracket the root!" << std::endl << "f(a) = " << fa << ", f(b) = " << fb << std::endl) if (fa <= 0.0) { low = a; hi = b; } else { low = b; hi = a; } int nit = 0; value_type u = low + (hi - low)/2.0; value_type f = func.eval(u); while ((Teuchos::ScalarTraits<value_type>::magnitude(hi - low) > 2.0*tol || Teuchos::ScalarTraits<value_type>::magnitude(f) > tol) && nit < max_num_its) { //std::cout << "u = " << u << " f = " << f << std::endl; if (f <= 0.0) low = u; else hi = u; u = low + (hi - low)/2.0; f = func.eval(u); ++nit; } TEUCHOS_TEST_FOR_EXCEPTION(nit >= max_num_its, std::logic_error, "Nonlinear solver did not converge!" << std::endl); return u; }
int global_wrap_test() { Func f("f"), g("g"), h("h"), i("i"); Var x("x"), y("y"); f(x, y) = x + y; g(x, y) = f(x, y); h(x, y) = g(x, y) + f(x, y); Var xi("xi"), yi("yi"), t("t"); Func wrapper = f.in(); f.compute_root(); h.compute_root().tile(x, y, xi, yi, 16, 16).fuse(x, y, t).parallel(t); g.compute_at(h, yi); wrapper.compute_at(h, yi).tile(x, y, xi, yi, 8, 8).fuse(xi, yi, t).vectorize(t, 4); // Check the call graphs. // Expect 'g' to call 'wrapper', 'wrapper' to call 'f', 'f' to call nothing, // 'h' to call 'wrapper' and 'g' Module m = h.compile_to_module({}); CheckCalls c; m.functions().front().body.accept(&c); CallGraphs expected = { {h.name(), {g.name(), wrapper.name()}}, {g.name(), {wrapper.name()}}, {wrapper.name(), {f.name()}}, {f.name(), {}}, }; if (check_call_graphs(c.calls, expected) != 0) { return -1; } Image<int> im = h.realize(200, 200); auto func = [](int x, int y) { return 2*(x + y); }; if (check_image(im, func)) { return -1; } return 0; }
int main(int argc, char **argv) { Image<float> in = Tools::load_image("input.png"); Func brighter; Var x, y, c; brighter(x, y, c) = pow(in(x, y, c), 0.8f); brighter.vectorize(x, 8).parallel(y); Image<float> output(in.width(), in.height(), in.channels()); for (int i = 0; i < 10; i++) { double t1 = current_time(); brighter.realize(output); double t2 = current_time(); std::cout << "Time: " << (t2 - t1) << "\n"; } Tools::save_image(output, "output.png"); return 0; }
int main(int argc, char **argv) { Func f; Var x, y; f(x, y) = select(x == y, 1, 0); Image<int> im = f.realize(10, 10); for (int y = 0; y < 10; y++) { for (int x = 0; x < 10; x++) { int correct = (x == y) ? 1 : 0; if (im(x, y) != correct) { printf("im(%d, %d) = %d instead of %d\n", x, y, im(x, y), correct); return -1; } } } printf("Success!\n"); return 0; }
value_type Stokhos::KL::OneDExponentialCovarianceFunction<value_type>:: newton(const Func& func, const value_type& a, const value_type& b, magnitude_type tol, int max_num_its) { value_type u = (a+b)/2.0; value_type f = func.eval(u); int nit = 0; while (Teuchos::ScalarTraits<value_type>::magnitude(f) > tol && nit < max_num_its) { std::cout << "u = " << u << " f = " << f << std::endl; value_type dfdu = func.deriv(u); u -= f / dfdu; f = func.eval(u); ++nit; } TEUCHOS_TEST_FOR_EXCEPTION(nit >= max_num_its, std::logic_error, "Nonlinear solver did not converge!" << std::endl); return u; }
int rdom_wrapper_test() { Func f("f"), g("g"), result("result"); Var x("x"), y("y"); f(x, y) = x + y; g(x, y) = 10; g(x, y) += 2 * f(x, x); g(x, y) += 3 * f(y, y); // Make a global wrapper on 'g', so that we can schedule initialization // and the update on the same compute level at the global wrapper Func wrapper = g.in().compute_root(); g.compute_at(wrapper, x); f.compute_root(); // Check the call graphs. // Expect 'wrapper' to call 'g', initialization of 'g' to call nothing // and its update to call 'f' and 'g', 'f' to call nothing Module m = wrapper.compile_to_module({}); CheckCalls c; m.functions().front().body.accept(&c); CallGraphs expected = { {g.name(), {f.name(), g.name()}}, {wrapper.name(), {g.name()}}, {f.name(), {}}, }; if (check_call_graphs(c.calls, expected) != 0) { return -1; } Image<int> im = wrapper.realize(200, 200); auto func = [](int x, int y) { return 4*x + 6* y + 10; }; if (check_image(im, func)) { return -1; } return 0; }
int not_dependent_on_vectorized_var_test() { Var x("x"), y("y"), z("z"); Func f ("f"), g("g"), ref("ref"); g(x, y, z) = x + y + z; g.compute_root(); RDom r(0, 80, 0, 80, 0, 80); r.where(r.z*r.z < 47); ref(x, y, z) = 10; ref(r.x, r.y, 1) = max(g(0, 1, 2), g(r.x + 1, r.y, 2)); Image<int> im_ref = ref.realize(160, 160, 160); f(x, y, z) = 10; f(r.x, r.y, 1) = max(g(0, 1, 2), g(r.x + 1, r.y, 2)); f.update(0).allow_race_conditions(); Target target = get_jit_target_from_environment(); if (target.features_any_of({Target::HVX_64, Target::HVX_128})) { f.update(0).hexagon().vectorize(r.z, 32); } else if (target.arch == Target::X86) { f.update(0).vectorize(r.z, 32); f.add_custom_lowering_pass(new CheckPredicatedStoreLoad(false, false)); } Image<int> im = f.realize(160, 160, 160); auto func = [im_ref](int x, int y, int z) { return im_ref(x, y, z); }; if (check_image(im, func)) { return -1; } return 0; }
int scalar_store_test() { Var x("x"), y("y"); Func f ("f"), g("g"), ref("ref"); g(x, y) = x + y; g.compute_root(); RDom r(0, 80, 0, 80); r.where(r.x + r.y < 48); ref(x, y) = 10; ref(13, 13) = max(g(0, 1), g(2*r.x + 1, r.y)); Image<int> im_ref = ref.realize(160, 160); f(x, y) = 10; f(13, 13) = max(g(0, 1), g(2*r.x + 1, r.y)); f.update(0).allow_race_conditions(); Target target = get_jit_target_from_environment(); if (target.features_any_of({Target::HVX_64, Target::HVX_128})) { f.update(0).hexagon().vectorize(r.x, 32); } else if (target.arch == Target::X86) { f.update(0).vectorize(r.x, 32); f.add_custom_lowering_pass(new CheckPredicatedStoreLoad(true, true)); } Image<int> im = f.realize(160, 160); auto func = [im_ref](int x, int y, int z) { return im_ref(x, y, z); }; if (check_image(im, func)) { return -1; } return 0; }
void rename_function(const String& old_name, const String& new_name) { auto const old = old_name.get(); auto const n3w = new_name.get(); auto const oldNe = const_cast<NamedEntity*>(Unit::GetNamedEntity(old)); auto const newNe = const_cast<NamedEntity*>(Unit::GetNamedEntity(n3w)); Func* func = Unit::lookupFunc(oldNe); if (!func) { // It's the caller's responsibility to ensure that the old function // exists. not_reached(); } if (!(func->attrs() & AttrDynamicInvoke)) { // When EvalJitEnableRenameFunction is false, the translator may wire // non-DynamicInvoke Func*'s into the TC. Don't rename functions. if (RuntimeOption::EvalJit && !RuntimeOption::EvalJitEnableRenameFunction) { raise_error("You must explicitly enable fb_rename_function in the JIT " "(-v Eval.JitEnableRenameFunction=true)"); } } Func *fnew = Unit::lookupFunc(newNe); if (fnew && fnew != func) { // To match hphpc, we silently ignore functions defined in user code that // have the same name as a function defined in a separable extension if (!fnew->isAllowOverride()) { raise_error("Function already defined: %s", n3w->data()); } return; } oldNe->setCachedFunc(nullptr); newNe->m_cachedFunc.bind(); newNe->setCachedFunc(func); if (RuntimeOption::EvalJit) { JIT::invalidateForRenameFunction(old); } }
int main(int argc, char **argv) { Func mandelbrot; Var x, y; Param<float> x_min, x_max, y_min, y_max, c_real, c_imag; Param<int> w, h, iters; Complex initial(lerp(x_min, x_max, cast<float>(x)/w), lerp(y_min, y_max, cast<float>(y)/h)); Complex c(c_real, c_imag); Var z; mandelbrot(x, y, z) = initial; RDom t(1, iters); Complex current = mandelbrot(x, y, t-1); mandelbrot(x, y, t) = current*current + c; // How many iterations until something escapes a circle of radius 2? Func count; Tuple escape = argmin(magnitude(mandelbrot(x, y, t)) < 4); // If it never escapes, use the value 0 count(x, y) = select(escape[1], 0, escape[0]); Var xi, yi, xo, yo; count.tile(x, y, xo, yo, xi, yi, 8, 8); count.parallel(yo).vectorize(xi, 4).unroll(xi).unroll(yi, 2); mandelbrot.compute_at(count, xo); Argument args[] = {x_min, x_max, y_min, y_max, c_real, c_imag, iters, w, h}; count.compile_to_file("mandelbrot", std::vector<Argument>(args, args + 9)); return 0; }
int main(int argc, char **argv) { // Move this test to correctness once we can support >4d buffer_ts on the gpu if (!get_jit_target_from_environment().has_gpu_feature()) { printf("No gpu target enabled. Skipping test.\n"); // This test is currently expected to error out. printf("Error: pretending that there was an error\n"); return -1; } Func f; Var v0, v1, v2, v3, v4; f(v0, v1, v2, v3, v4) = v0 + 2*v1 + 4*v2 + 8*v3 + 16*v4; f.compute_root().gpu_blocks(v3, v4).gpu_threads(v1, v2); // Linearize into an output buffer Func g; g(v0) = f(v0 % 2, (v0 / 2) % 2, (v0 / 4) % 2, (v0 / 8) % 2, (v0 / 16) % 2); Image<int> result = g.realize(32); // Delete this code once this test works. printf("Error: I should not have successfully compiled.\n"); return -1; for (int i = 0; i < result.width(); i++) { if (i != result(i)) { printf("result(%d) = %d instead of %d\n", i, result(i), i); return -1; } } printf("Success!\n"); return 0; }
double test(Func f, bool test_correctness = true) { f.compile_to_assembly(f.name() + ".s", Internal::vec<Argument>(input), f.name()); f.compile_jit(); f.realize(output); if (test_correctness) { for (int y = 0; y < output.height(); y++) { for (int x = 0; x < output.width(); x++) { int ix1 = std::max(std::min(x, MAX), MIN); int ix2 = std::max(std::min(x+1, MAX), MIN); uint16_t correct = input(ix1, y) * 3 + input(ix2, y); if (output(x, y) != correct) { printf("output(%d, %d) = %d instead of %d\n", x, y, output(x, y), correct); exit(-1); } } } } double t1 = currentTime(); for (int i = 0; i < 10; i++) { f.realize(output); } return currentTime() - t1; }
int main(int argc, char **argv) { Func source; source.define_extern("make_data", std::vector<ExternFuncArgument>(), Float(32), 2); Func sink; Var x, y; sink(x, y) = source(x, y) - sin(x + y); Var xi, yi; sink.tile(x, y, xi, yi, 32, 32); // Compute the source per tile of sink source.compute_at(sink, x); Image<float> output = sink.realize(100, 100); // Should be all zeroes. RDom r(output); float error = evaluate<float>(sum(abs(output(r.x, r.y)))); if (error != 0) { printf("Something went wrong\n"); return -1; } printf("Success!\n"); return 0; }
Func repeat_image(const Func &source, const std::vector<std::pair<Expr, Expr>> &bounds) { std::vector<Var> args(source.args()); user_assert(args.size() >= bounds.size()) << "repeat_image called with more bounds (" << bounds.size() << ") than dimensions (" << args.size() << ") Func " << source.name() << "has.\n"; std::vector<Expr> actuals; for (size_t i = 0; i < bounds.size(); i++) { Var arg_var = args[i]; Expr min = bounds[i].first; Expr extent = bounds[i].second; if (min.defined() && extent.defined()) { Expr coord = arg_var - min; // Enforce zero origin. coord = coord % extent; // Range is 0 to w-1 coord = coord + min; // Restore correct min coord = select(arg_var < min || arg_var >= min + extent, coord, clamp(likely(arg_var), min, min + extent - 1)); actuals.push_back(coord); } else if (!min.defined() && !extent.defined()) { actuals.push_back(arg_var); } else { user_error << "Partially undefined bounds for dimension " << arg_var << " of Func " << source.name() << "\n"; } } // If there were fewer bounds than dimensions, regard the ones at the end as unbounded. actuals.insert(actuals.end(), args.begin() + actuals.size(), args.end()); Func bounded("repeat_image"); bounded(args) = source(actuals); return bounded; }
/* Do n unrolled iterations of game of life on a torus */ Func gameOfLife(ImageParam input, int n) { Var x, y; Func in; if (n == 1) { in(x, y) = input(x, y); } else { in = gameOfLife(input, n-1); in.compute_root(); } Expr w = input.width(), h = input.height(); Expr W = (x+w-1) % w, E = (x+1) % w, N = (y+h-1) % h, S = (y+1) % h; Expr livingNeighbors = (in(W, N) + in(x, N) + in(E, N) + in(W, y) + in(E, y) + in(W, S) + in(x, S) + in(E, S)); Expr alive = in(x, y) != 0; Func output; output(x, y) = select(livingNeighbors == 3 || (alive && livingNeighbors == 2), u8(1), u8(0)); return output; }