int no_op_store_test() { Var x("x"), y("y"); Func f ("f"), ref("ref"); RDom r(0, 80, 0, 80); r.where(r.x + r.y < 47); ref(x, y) = x + y; ref(2*r.x + 1, r.y) = ref(2*r.x + 1, r.y); ref(2*r.x, 3*r.y) = ref(2*r.x, 3*r.y); Image<int> im_ref = ref.realize(240, 240); f(x, y) = x + y; f(2*r.x + 1, r.y) = f(2*r.x + 1, r.y); f(2*r.x, 3*r.y) = f(2*r.x, 3*r.y); Target target = get_jit_target_from_environment(); if (target.features_any_of({Target::HVX_64, Target::HVX_128})) { f.update(0).hexagon().vectorize(r.x, 32); f.update(1).hexagon().vectorize(r.y, 32); } else if (target.arch == Target::X86) { f.update(0).vectorize(r.x, 32); f.update(1).vectorize(r.y, 32); f.add_custom_lowering_pass(new CheckPredicatedStoreLoad(false, false)); } Image<int> im = f.realize(240, 240); auto func = [im_ref](int x, int y, int z) { return im_ref(x, y, z); }; if (check_image(im, func)) { return -1; } return 0; }
int vectorized_predicated_store_scalarized_predicated_load_test() { Var x("x"), y("y"); Func f ("f"), g("g"), ref("ref"); g(x, y) = x + y; g.compute_root(); RDom r(0, 100, 0, 100); r.where(r.x + r.y < r.x*r.y); ref(x, y) = 10; ref(r.x, r.y) += g(2*r.x, r.y) + g(2*r.x + 1, r.y); Image<int> im_ref = ref.realize(170, 170); f(x, y) = 10; f(r.x, r.y) += g(2*r.x, r.y) + g(2*r.x + 1, r.y); Target target = get_jit_target_from_environment(); if (target.features_any_of({Target::HVX_64, Target::HVX_128})) { f.update(0).hexagon().vectorize(r.x, 32); } else if (target.arch == Target::X86) { f.update(0).vectorize(r.x, 32); f.add_custom_lowering_pass(new CheckPredicatedStoreLoad(true, true)); } Image<int> im = f.realize(170, 170); auto func = [im_ref](int x, int y, int z) { return im_ref(x, y, z); }; if (check_image(im, func)) { return -1; } return 0; }
int not_dependent_on_vectorized_var_test() { Var x("x"), y("y"), z("z"); Func f ("f"), g("g"), ref("ref"); g(x, y, z) = x + y + z; g.compute_root(); RDom r(0, 80, 0, 80, 0, 80); r.where(r.z*r.z < 47); ref(x, y, z) = 10; ref(r.x, r.y, 1) = max(g(0, 1, 2), g(r.x + 1, r.y, 2)); Image<int> im_ref = ref.realize(160, 160, 160); f(x, y, z) = 10; f(r.x, r.y, 1) = max(g(0, 1, 2), g(r.x + 1, r.y, 2)); f.update(0).allow_race_conditions(); Target target = get_jit_target_from_environment(); if (target.features_any_of({Target::HVX_64, Target::HVX_128})) { f.update(0).hexagon().vectorize(r.z, 32); } else if (target.arch == Target::X86) { f.update(0).vectorize(r.z, 32); f.add_custom_lowering_pass(new CheckPredicatedStoreLoad(false, false)); } Image<int> im = f.realize(160, 160, 160); auto func = [im_ref](int x, int y, int z) { return im_ref(x, y, z); }; if (check_image(im, func)) { return -1; } return 0; }
int multiple_vectorized_predicate_test() { int size = 100; Var x("x"), y("y"); Func f ("f"), g("g"), ref("ref"); g(x, y) = x * y; g.compute_root(); RDom r(0, size, 0, size); r.where(r.x + r.y < 57); r.where(r.x*r.y + r.x*r.x < 490); ref(x, y) = 10; ref(r.x, r.y) = g(size-r.x, r.y) * 2 + g(67-r.x, r.y); Image<int> im_ref = ref.realize(size, size); f(x, y) = 10; f(r.x, r.y) = g(size-r.x, r.y) * 2 + g(67-r.x, r.y); Target target = get_jit_target_from_environment(); if (target.features_any_of({Target::HVX_64, Target::HVX_128})) { //TODO(psuriana): the hexagon test for this one is broken //f.update(0).hexagon().vectorize(r.x, 32); } else if (target.arch == Target::X86) { f.update(0).vectorize(r.x, 32); f.add_custom_lowering_pass(new CheckPredicatedStoreLoad(true, true)); } Image<int> im = f.realize(size, size); auto func = [&im_ref](int x, int y, int z) { return im_ref(x, y, z); }; if (check_image(im, func)) { return -1; } return 0; }
int vectorized_dense_load_with_stride_minus_one_test() { int size = 73; Var x("x"), y("y"); Func f ("f"), g("g"), ref("ref"); g(x, y) = x * y; g.compute_root(); ref(x, y) = select(x < 23, g(size-x, y) * 2 + g(20-x, y), undef<int>()); Image<int> im_ref = ref.realize(size, size); f(x, y) = select(x < 23, g(size-x, y) * 2 + g(20-x, y), undef<int>()); Target target = get_jit_target_from_environment(); if (target.features_any_of({Target::HVX_64, Target::HVX_128})) { //TODO(psuriana): the hexagon test for this one is broken //f.hexagon().vectorize(x, 16); } else if (target.arch == Target::X86) { f.vectorize(x, 32); f.add_custom_lowering_pass(new CheckPredicatedStoreLoad(true, true)); } Image<int> im = f.realize(size, size); auto func = [&im_ref, &im](int x, int y, int z) { // For x >= 23, the buffer is undef return (x < 23) ? im_ref(x, y, z) : im(x, y, z); }; if (check_image(im, func)) { return -1; } return 0; }
bool perform_test(const char *label, const Target target, Func f, int expected_nvarying, float tol, std::function<float(int x, int y, int c)> expected_val) { fprintf(stderr, "%s\n", label); Buffer<float> out(8, 8, 3); varyings.clear(); f.add_custom_lowering_pass(new CountVarying); f.realize(out, target); // Check for the correct number of varying attributes if ((int)varyings.size() != expected_nvarying) { fprintf(stderr, "%s: Error: wrong number of varying attributes: %d should be %d\n", label, (int)varyings.size(), expected_nvarying); return false; } // Check for correct result values out.copy_to_host(); if (!Testing::check_result<float>(out, tol, expected_val)) { return false; } fprintf(stderr, "%s Passed!\n", label); return true; }
int main(int argc, char **argv) { ImageParam input(UInt(8), 1); input.dim(0).set_bounds(0, size); { Func f; Var x; f(x) = input(x); // Output must have the same size as the input. f.output_buffer().dim(0).set_bounds(input.dim(0).min(), input.dim(0).extent()); f.add_custom_lowering_pass(new Validator); f.compile_jit(); Buffer<uint8_t> dummy(size); dummy.fill(42); input.set(dummy); Buffer<uint8_t> out = f.realize(size); if (!out.all_equal(42)) { std::cerr << "wrong output" << std::endl; exit(-1); } } { Func f; Var x; f(x) = undef(UInt(8)); RDom r(input); f(r.x) = cast<uint8_t>(42); f.add_custom_lowering_pass(new Validator); f.compile_jit(); Buffer<uint8_t> dummy(size); input.set(dummy); Buffer<uint8_t> out = f.realize(size); if (!out.all_equal(42)) { std::cerr << "wrong output" << std::endl; exit(-1); } } std::cout << "Success!" << std::endl; return 0; }
int main(int argc, char **argv) { if (!get_jit_target_from_environment().has_gpu_feature()) { printf("Not running test because no gpu target enabled\n"); return 0; } { Func f; Var x, y, z; // Construct a Func with lots of potential race conditions, and // then run it in thread blocks on the gpu. f(x, y) = x + 100 * y; const int passes = 10; for (int i = 0; i < passes; i++) { RDom rx(0, 10); // Flip each row, using spots 10-19 as temporary storage f(rx + 10, y) = f(9 - rx, y); f(rx, y) = f(rx + 10, y); // Flip each column the same way RDom ry(0, 8); f(x, ry + 8) = f(x, 7 - ry); f(x, ry) = f(x, ry + 8); } Func g; g(x, y) = f(0, 0)+ f(9, 7); g.gpu_tile(x, y, 16, 8); f.compute_at(g, Var::gpu_blocks()); for (int i = 0; i < passes; i++) { f.update(i*4 + 0).gpu_threads(y); f.update(i*4 + 1).gpu_threads(y); f.update(i*4 + 2).gpu_threads(x); f.update(i*4 + 3).gpu_threads(x); } Image<int> out = g.realize(100, 100); for (int y = 0; y < out.height(); y++) { for (int x = 0; x < out.width(); x++) { int correct = 7*100 + 9; if (out(x, y) != correct) { printf("out(%d, %d) = %d instead of %d\n", x, y, out(x, y), correct); return -1; } } } } { // Construct a Func with undef stages, then run it in thread // blocks and make sure the right number of syncthreads are // added. Func f; Var x, y, z; f(x, y) = undef<int>(); f(x, y) += x + 100 * y; // This next line is dubious, because it entirely masks the // effect of the previous definition. If you add an undefined // value to the previous def, then Halide can evaluate this to // whatever it likes. Currently we'll just elide this update // definition. f(x, y) += undef<int>(); f(x, y) += y * 100 + x; Func g; g(x, y) = f(0, 0) + f(7, 7); g.gpu_tile(x, y, 8, 8); f.compute_at(g, Var::gpu_blocks()); f.gpu_threads(x, y); f.update(0).gpu_threads(x, y); f.update(1).gpu_threads(x, y); f.update(2).gpu_threads(x, y); // There should be two thread barriers: one in between the // non-undef definitions, and one between f and g. g.add_custom_lowering_pass(new CheckBarrierCount(2)); Image<int> out = g.realize(100, 100); } printf("Success!\n"); return 0; }