Ejemplo n.º 1
0
double test(Func f, bool test_correctness = true) {
    f.compile_to_assembly(f.name() + ".s", Internal::vec<Argument>(input), f.name());
    f.compile_jit();
    f.realize(output);

    if (test_correctness) {
        for (int y = 0; y < output.height(); y++) {
            for (int x = 0; x < output.width(); x++) {
                int ix1 = std::max(std::min(x, MAX), MIN);
                int ix2 = std::max(std::min(x+1, MAX), MIN);
                uint16_t correct = input(ix1, y) * 3 + input(ix2, y);
                if (output(x, y) != correct) {
                    printf("output(%d, %d) = %d instead of %d\n",
                           x, y, output(x, y), correct);
                    exit(-1);
                }
            }
        }
    }

    double t1 = currentTime();
    for (int i = 0; i < 10; i++) {
        f.realize(output);
    }
    return currentTime() - t1;
}
Ejemplo n.º 2
0
int main(int argc, char **argv) {
    ImageParam src(UInt(8), 1);
    Func dst;
    Var x;
    dst(x) = src(x);


    Var xo;
    dst.split(x, xo, x, 8*4096);
    // dst.parallel(xo); speeds up halide's memcpy considerably, but doesn't seem sporting
    dst.vectorize(x, 16);

    dst.compile_to_assembly("memcpy.s", {src}, "memcpy");
    dst.compile_jit();

    const int32_t buffer_size = 12345678;
    const int iterations = 50;

    Image<uint8_t> input(buffer_size);
    Image<uint8_t> output(buffer_size);

    src.set(input);

    // Get past one-time set-up issues for the ptx backend.
    dst.realize(output);

    double halide = 0, system = 0;
    for (int i = 0; i < iterations; i++) {
        double t1 = current_time();
        dst.realize(output);
        dst.realize(output);
        dst.realize(output);
        double t2 = current_time();
        memcpy(output.data(), input.data(), input.width());
        memcpy(output.data(), input.data(), input.width());
        memcpy(output.data(), input.data(), input.width());
        double t3 = current_time();
        system += t3-t2;
        halide += t2-t1;
    }

    printf("system memcpy: %.3e byte/s\n", (buffer_size / system) * 3 * 1000 * iterations);
    printf("halide memcpy: %.3e byte/s\n", (buffer_size / halide) * 3 * 1000 * iterations);

    // memcpy will win by a little bit for large inputs because it uses streaming stores
    if (halide > system * 2) {
        printf("Halide memcpy is slower than it should be.\n");
        return -1;
    }

    printf("Success!\n");
    return 0;
}
Ejemplo n.º 3
0
int main(int argc, char **argv) {
    // The camera pipe is specialized on the 2592x1968 images that
    // come in, so we'll just use an image instead of a uniform image.
    ImageParam input(UInt(16), 2);
    ImageParam matrix_3200(Float(32), 2, "m3200"), matrix_7000(Float(32), 2, "m7000");
    Param<float> color_temp("color_temp"); //, 3200.0f);
    Param<float> gamma("gamma"); //, 1.8f);
    Param<float> contrast("contrast"); //, 10.0f);
    Param<int> blackLevel("blackLevel"); //, 25);
    Param<int> whiteLevel("whiteLevel"); //, 1023);

    // shift things inwards to give us enough padding on the
    // boundaries so that we don't need to check bounds. We're going
    // to make a 2560x1920 output image, just like the FCam pipe, so
    // shift by 16, 12
    Func shifted;
    shifted(x, y) = input(x+16, y+12);

    // Parameterized output type, because LLVM PTX (GPU) backend does not
    // currently allow 8-bit computations
    int bit_width = atoi(argv[1]);
    Type result_type = UInt(bit_width);

    // Pick a schedule
    schedule = atoi(argv[2]);

    // Build the pipeline
    Func processed = process(shifted, result_type, matrix_3200, matrix_7000,
                             color_temp, gamma, contrast, blackLevel, whiteLevel);

    // We can generate slightly better code if we know the output is a whole number of tiles.
    Expr out_width = processed.output_buffer().width();
    Expr out_height = processed.output_buffer().height();
    processed
        .bound(tx, 0, (out_width/32)*32)
        .bound(ty, 0, (out_height/32)*32);

    //string s = processed.serialize();
    //printf("%s\n", s.c_str());

    std::vector<Argument> args = {color_temp, gamma, contrast, blackLevel, whiteLevel,
                                  input, matrix_3200, matrix_7000};
    processed.compile_to_file("curved", args);
    processed.compile_to_assembly("curved.s", args);

    return 0;
}
Ejemplo n.º 4
0
int main(int argc, char **argv) {
    // The camera pipe is specialized on the 2592x1968 images that
    // come in, so we'll just use an image instead of a uniform image.
    ImageParam input(UInt(16), 2);
    ImageParam matrix_3200(Float(32), 2, "m3200"), matrix_7000(Float(32), 2, "m7000");
    Param<float> color_temp("color_temp"); //, 3200.0f);
    Param<float> gamma("gamma"); //, 1.8f);
    Param<float> contrast("contrast"); //, 10.0f);
    Param<int> blackLevel("blackLevel"); //, 25);
    Param<int> whiteLevel("whiteLevel"); //, 1023);

    // shift things inwards to give us enough padding on the
    // boundaries so that we don't need to check bounds. We're going
    // to make a 2560x1920 output image, just like the FCam pipe, so
    // shift by 16, 12. We also convert it to be signed, so we can deal
    // with values that fall below 0 during processing.
    Func shifted;
    shifted(x, y) = cast<int16_t>(input(x+16, y+12));

    // Parameterized output type, because LLVM PTX (GPU) backend does not
    // currently allow 8-bit computations
    int bit_width = atoi(argv[1]);
    Type result_type = UInt(bit_width);

    // Pick a target
    target = get_target_from_environment();

    // Build the pipeline
    Func processed = process(shifted, result_type, matrix_3200, matrix_7000,
                             color_temp, gamma, contrast, blackLevel, whiteLevel);

    std::vector<Argument> args = {color_temp, gamma, contrast, blackLevel, whiteLevel,
                                  input, matrix_3200, matrix_7000};
    // TODO: it would be more efficient to call compile_to() a single time with the right arguments
    processed.compile_to_static_library("curved", args, "curved", target);
    processed.compile_to_assembly("curved.s", args, target);

    return 0;
}
Ejemplo n.º 5
0
int main(int argc, char **argv) {
    Func f, g;
    Var x, y;
    ImageParam param(Int(32), 2);
    Buffer<int> image1(128, 73);
    Buffer<int> image2(144, 23);

    f(x, y) = param(x, y)*2;

    param.dim(0).set_bounds(0, 128);

    f.set_error_handler(my_error_handler);

    // This should be fine
    param.set(image1);
    error_occurred = false;
    f.realize(20, 20);

    if (error_occurred) {
        printf("Error incorrectly raised\n");
        return -1;
    }
    // This should be an error, because dimension 0 of image 2 is not from 0 to 128 like we promised
    param.set(image2);
    error_occurred = false;
    f.realize(20, 20);

    if (!error_occurred) {
        printf("Error incorrectly not raised\n");
        return -1;
    }

    // Now try constraining the output buffer of a function
    g(x, y) = x*y;
    g.set_error_handler(my_error_handler);
    g.output_buffer().dim(0).set_stride(2);
    error_occurred = false;
    g.realize(image1);
    if (!error_occurred) {
        printf("Error incorrectly not raised when constraining output buffer\n");
        return -1;
    }

    Func h;
    h(x, y) = x*y;
    h.set_error_handler(my_error_handler);
    h.output_buffer()
        .dim(0)
            .set_stride(1)
            .set_bounds(0, ((h.output_buffer().dim(0).extent())/8)*8)
        .dim(1)
            .set_bounds(0, image1.dim(1).extent());
    error_occurred = false;
    h.realize(image1);

    std::string assembly_file = Internal::get_test_tmp_dir() + "h.s";
    Internal::ensure_no_file_exists(assembly_file);

    // Also check it compiles ok without an inferred argument list
    h.compile_to_assembly(assembly_file, {image1}, "h");
    if (error_occurred) {
        printf("Error incorrectly raised when constraining output buffer\n");
        return -1;
    }

    Internal::assert_file_exists(assembly_file);

    printf("Success!\n");
    return 0;
}
Ejemplo n.º 6
0
int main(int argc, char **argv) {
    Func f, g;
    Var x, y;
    ImageParam param(Int(32), 2);
    Image<int> image1(128, 73);
    Image<int> image2(144, 23);

    f(x, y) = param(x, y)*2;

    param.set_bounds(0, 0, 128);

    f.set_error_handler(my_error_handler);

    // This should be fine
    param.set(image1);
    error_occurred = false;
    f.realize(20, 20);

    if (error_occurred) {
        printf("Error incorrectly raised\n");
        return -1;
    }
    // This should be an error, because dimension 0 of image 2 is not from 0 to 128 like we promised
    param.set(image2);
    error_occurred = false;
    f.realize(20, 20);

    if (!error_occurred) {
        printf("Error incorrectly not raised\n");
        return -1;
    }

    // Now try constraining the output buffer of a function
    g(x, y) = x*y;
    g.set_error_handler(my_error_handler);
    g.output_buffer().set_stride(0, 2);
    error_occurred = false;
    g.realize(image1);
    if (!error_occurred) {
        printf("Error incorrectly not raised when constraining output buffer\n");
        return -1;
    }

    Func h;
    h(x, y) = x*y;
    h.set_error_handler(my_error_handler);
    h.output_buffer()
        .set_stride(0, 1)
        .set_bounds(1, 0, image1.extent(1))
        .set_bounds(0, 0, ((h.output_buffer().extent(0))/8)*8);
    error_occurred = false;
    h.realize(image1);
    // Also check it compiles ok without an inferred argument list
    h.compile_to_assembly("h.s", Internal::vec<Argument>(image1), "h");
    if (error_occurred) {
        printf("Error incorrectly raised when constraining output buffer\n");
        return -1;
    }

    printf("Success!\n");
    return 0;
}