Ejemplo n.º 1
0
int no_op_store_test() {
    Var x("x"), y("y");
    Func f ("f"), ref("ref");

    RDom r(0, 80, 0, 80);
    r.where(r.x + r.y < 47);

    ref(x, y) = x + y;
    ref(2*r.x + 1, r.y) = ref(2*r.x + 1, r.y);
    ref(2*r.x, 3*r.y) = ref(2*r.x, 3*r.y);
    Image<int> im_ref = ref.realize(240, 240);

    f(x, y) = x + y;
    f(2*r.x + 1, r.y) = f(2*r.x + 1, r.y);
    f(2*r.x, 3*r.y) = f(2*r.x, 3*r.y);

    Target target = get_jit_target_from_environment();
    if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
        f.update(0).hexagon().vectorize(r.x, 32);
        f.update(1).hexagon().vectorize(r.y, 32);
    } else if (target.arch == Target::X86) {
        f.update(0).vectorize(r.x, 32);
        f.update(1).vectorize(r.y, 32);
        f.add_custom_lowering_pass(new CheckPredicatedStoreLoad(false, false));
    }

    Image<int> im = f.realize(240, 240);
    auto func = [im_ref](int x, int y, int z) { return im_ref(x, y, z); };
    if (check_image(im, func)) {
        return -1;
    }
    return 0;
}
Ejemplo n.º 2
0
int vectorized_predicated_store_scalarized_predicated_load_test() {
    Var x("x"), y("y");
    Func f ("f"), g("g"), ref("ref");

    g(x, y) = x + y;
    g.compute_root();

    RDom r(0, 100, 0, 100);
    r.where(r.x + r.y < r.x*r.y);

    ref(x, y) = 10;
    ref(r.x, r.y) += g(2*r.x, r.y) + g(2*r.x + 1, r.y);
    Image<int> im_ref = ref.realize(170, 170);

    f(x, y) = 10;
    f(r.x, r.y) += g(2*r.x, r.y) + g(2*r.x + 1, r.y);

    Target target = get_jit_target_from_environment();
    if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
        f.update(0).hexagon().vectorize(r.x, 32);
    } else if (target.arch == Target::X86) {
        f.update(0).vectorize(r.x, 32);
        f.add_custom_lowering_pass(new CheckPredicatedStoreLoad(true, true));
    }

    Image<int> im = f.realize(170, 170);
    auto func = [im_ref](int x, int y, int z) { return im_ref(x, y, z); };
    if (check_image(im, func)) {
        return -1;
    }
    return 0;
}
Ejemplo n.º 3
0
int not_dependent_on_vectorized_var_test() {
    Var x("x"), y("y"), z("z");
    Func f ("f"), g("g"), ref("ref");

    g(x, y, z) = x + y + z;
    g.compute_root();

    RDom r(0, 80, 0, 80, 0, 80);
    r.where(r.z*r.z < 47);

    ref(x, y, z) = 10;
    ref(r.x, r.y, 1) = max(g(0, 1, 2), g(r.x + 1, r.y, 2));
    Image<int> im_ref = ref.realize(160, 160, 160);

    f(x, y, z) = 10;
    f(r.x, r.y, 1) = max(g(0, 1, 2), g(r.x + 1, r.y, 2));

    f.update(0).allow_race_conditions();

    Target target = get_jit_target_from_environment();
    if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
        f.update(0).hexagon().vectorize(r.z, 32);
    } else if (target.arch == Target::X86) {
        f.update(0).vectorize(r.z, 32);
        f.add_custom_lowering_pass(new CheckPredicatedStoreLoad(false, false));
    }

    Image<int> im = f.realize(160, 160, 160);
    auto func = [im_ref](int x, int y, int z) { return im_ref(x, y, z); };
    if (check_image(im, func)) {
        return -1;
    }
    return 0;
}
Ejemplo n.º 4
0
int multiple_vectorized_predicate_test() {
    int size = 100;
    Var x("x"), y("y");
    Func f ("f"), g("g"), ref("ref");

    g(x, y) = x * y;
    g.compute_root();

    RDom r(0, size, 0, size);
    r.where(r.x + r.y < 57);
    r.where(r.x*r.y + r.x*r.x < 490);

    ref(x, y) = 10;
    ref(r.x, r.y) = g(size-r.x, r.y) * 2 + g(67-r.x, r.y);
    Image<int> im_ref = ref.realize(size, size);

    f(x, y) = 10;
    f(r.x, r.y) = g(size-r.x, r.y) * 2 + g(67-r.x, r.y);

    Target target = get_jit_target_from_environment();
    if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
        //TODO(psuriana): the hexagon test for this one is broken
        //f.update(0).hexagon().vectorize(r.x, 32);
    } else if (target.arch == Target::X86) {
        f.update(0).vectorize(r.x, 32);
        f.add_custom_lowering_pass(new CheckPredicatedStoreLoad(true, true));
    }

    Image<int> im = f.realize(size, size);
    auto func = [&im_ref](int x, int y, int z) { return im_ref(x, y, z); };
    if (check_image(im, func)) {
        return -1;
    }
    return 0;
}
Ejemplo n.º 5
0
int vectorized_dense_load_with_stride_minus_one_test() {
    int size = 73;
    Var x("x"), y("y");
    Func f ("f"), g("g"), ref("ref");

    g(x, y) = x * y;
    g.compute_root();

    ref(x, y) = select(x < 23, g(size-x, y) * 2 + g(20-x, y), undef<int>());
    Image<int> im_ref = ref.realize(size, size);

    f(x, y) = select(x < 23, g(size-x, y) * 2 + g(20-x, y), undef<int>());

    Target target = get_jit_target_from_environment();
    if (target.features_any_of({Target::HVX_64, Target::HVX_128})) {
        //TODO(psuriana): the hexagon test for this one is broken
        //f.hexagon().vectorize(x, 16);
    } else if (target.arch == Target::X86) {
        f.vectorize(x, 32);
        f.add_custom_lowering_pass(new CheckPredicatedStoreLoad(true, true));
    }

    Image<int> im = f.realize(size, size);
    auto func = [&im_ref, &im](int x, int y, int z) {
        // For x >= 23, the buffer is undef
        return (x < 23) ? im_ref(x, y, z) : im(x, y, z);
    };
    if (check_image(im, func)) {
        return -1;
    }
    return 0;
}
Ejemplo n.º 6
0
bool perform_test(const char *label, const Target target, Func f, int expected_nvarying, float tol, std::function<float(int x, int y, int c)> expected_val) {
    fprintf(stderr, "%s\n", label);

    Buffer<float> out(8, 8, 3);

    varyings.clear();
    f.add_custom_lowering_pass(new CountVarying);
    f.realize(out, target);

    // Check for the correct number of varying attributes
    if ((int)varyings.size() != expected_nvarying) {
        fprintf(stderr,
                "%s: Error: wrong number of varying attributes: %d should be %d\n",
                label, (int)varyings.size(), expected_nvarying);
        return false;
    }

    // Check for correct result values
    out.copy_to_host();

    if (!Testing::check_result<float>(out, tol, expected_val)) {
        return false;
    }

    fprintf(stderr, "%s Passed!\n", label);
    return true;
}
Ejemplo n.º 7
0
int main(int argc, char **argv) {
    ImageParam input(UInt(8), 1);
    input.dim(0).set_bounds(0, size);

    {
        Func f;
        Var x;
        f(x) = input(x);
        // Output must have the same size as the input.
        f.output_buffer().dim(0).set_bounds(input.dim(0).min(), input.dim(0).extent());
        f.add_custom_lowering_pass(new Validator);
        f.compile_jit();

        Buffer<uint8_t> dummy(size);
        dummy.fill(42);
        input.set(dummy);
        Buffer<uint8_t> out = f.realize(size);
        if (!out.all_equal(42)) {
            std::cerr << "wrong output" << std::endl;
            exit(-1);
        }
    }

    {
        Func f;
        Var x;
        f(x) = undef(UInt(8));
        RDom r(input);
        f(r.x) = cast<uint8_t>(42);

        f.add_custom_lowering_pass(new Validator);
        f.compile_jit();

        Buffer<uint8_t> dummy(size);
        input.set(dummy);
        Buffer<uint8_t> out = f.realize(size);
        if (!out.all_equal(42)) {
            std::cerr << "wrong output" << std::endl;
            exit(-1);
        }
    }

    std::cout << "Success!" << std::endl;

    return 0;

}
Ejemplo n.º 8
0
int main(int argc, char **argv) {
    if (!get_jit_target_from_environment().has_gpu_feature()) {
        printf("Not running test because no gpu target enabled\n");
        return 0;
    }

    {
        Func f;
        Var x, y, z;

        // Construct a Func with lots of potential race conditions, and
        // then run it in thread blocks on the gpu.

        f(x, y) = x + 100 * y;

        const int passes = 10;
        for (int i = 0; i < passes; i++) {
            RDom rx(0, 10);
            // Flip each row, using spots 10-19 as temporary storage
            f(rx + 10, y) = f(9 - rx, y);
            f(rx, y) = f(rx + 10, y);
            // Flip each column the same way
            RDom ry(0, 8);
            f(x, ry + 8) = f(x, 7 - ry);
            f(x, ry) = f(x, ry + 8);
        }

        Func g;
        g(x, y) = f(0, 0)+ f(9, 7);

        g.gpu_tile(x, y, 16, 8);
        f.compute_at(g, Var::gpu_blocks());

        for (int i = 0; i < passes; i++) {
            f.update(i*4 + 0).gpu_threads(y);
            f.update(i*4 + 1).gpu_threads(y);
            f.update(i*4 + 2).gpu_threads(x);
            f.update(i*4 + 3).gpu_threads(x);
        }

        Image<int> out = g.realize(100, 100);
        for (int y = 0; y < out.height(); y++) {
            for (int x = 0; x < out.width(); x++) {
                int correct = 7*100 + 9;
                if (out(x, y) != correct) {
                    printf("out(%d, %d) = %d instead of %d\n",
                           x, y, out(x, y), correct);
                    return -1;
                }
            }
        }

    }

    {
        // Construct a Func with undef stages, then run it in thread
        // blocks and make sure the right number of syncthreads are
        // added.

        Func f;
        Var x, y, z;
        f(x, y) = undef<int>();
        f(x, y) += x + 100 * y;
        // This next line is dubious, because it entirely masks the
        // effect of the previous definition. If you add an undefined
        // value to the previous def, then Halide can evaluate this to
        // whatever it likes. Currently we'll just elide this update
        // definition.
        f(x, y) += undef<int>();
        f(x, y) += y * 100 + x;

        Func g;
        g(x, y) = f(0, 0) + f(7, 7);

        g.gpu_tile(x, y, 8, 8);
        f.compute_at(g, Var::gpu_blocks());

        f.gpu_threads(x, y);
        f.update(0).gpu_threads(x, y);
        f.update(1).gpu_threads(x, y);
        f.update(2).gpu_threads(x, y);

        // There should be two thread barriers: one in between the
        // non-undef definitions, and one between f and g.
        g.add_custom_lowering_pass(new CheckBarrierCount(2));

        Image<int> out = g.realize(100, 100);
    }

    printf("Success!\n");
    return 0;
}