Ejemplo n.º 1
0
int main(int, char**)
{
    std::vector<std::chrono::duration<double,std::milli>> duration_vector_1;
    std::vector<std::chrono::duration<double,std::milli>> duration_vector_2;

    Halide::Buffer<uint8_t> input = Halide::Tools::load_image("./utils/images/rgb.png");

    Halide::Buffer<uint8_t> output_ref_f(input.width(), input.height(), input.channels());
    Halide::Buffer<uint8_t> output_ref_g(input.width(), input.height(), input.channels());
    Halide::Buffer<uint8_t> output_ref_h(input.width(), input.height(), input.channels());
    Halide::Buffer<uint8_t> output_ref_k(input.width(), input.height(), input.channels());
    Halide::Buffer<uint8_t> output_tiramisu_f(input.width(), input.height(), input.channels());
    Halide::Buffer<uint8_t> output_tiramisu_g(input.width(), input.height(), input.channels());
    Halide::Buffer<uint8_t> output_tiramisu_h(input.width(), input.height(), input.channels());
    Halide::Buffer<uint8_t> output_tiramisu_k(input.width(), input.height(), input.channels());

    // Warm up
    fusion_tiramisu(input.raw_buffer(), output_tiramisu_f.raw_buffer(),
		    output_tiramisu_g.raw_buffer(), output_tiramisu_h.raw_buffer(),
		    output_tiramisu_k.raw_buffer());
    fusion_ref(input.raw_buffer(), output_ref_f.raw_buffer(), output_ref_g.raw_buffer(),
	       output_ref_h.raw_buffer(), output_ref_k.raw_buffer());

    // Tiramisu
    for (int i=0; i<NB_TESTS; i++)
    {
        auto start1 = std::chrono::high_resolution_clock::now();
        fusion_tiramisu(input.raw_buffer(), output_tiramisu_f.raw_buffer(),
			output_tiramisu_g.raw_buffer(), output_tiramisu_h.raw_buffer(),
			output_tiramisu_k.raw_buffer());
        auto end1 = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double,std::milli> duration1 = end1 - start1;
        duration_vector_1.push_back(duration1);
    }

    // Reference
    for (int i=0; i<NB_TESTS; i++)
    {
        auto start2 = std::chrono::high_resolution_clock::now();
        fusion_ref(input.raw_buffer(), output_ref_f.raw_buffer(), output_ref_g.raw_buffer(),
		   output_ref_h.raw_buffer(), output_ref_k.raw_buffer());
        auto end2 = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double,std::milli> duration2 = end2 - start2;
        duration_vector_2.push_back(duration2);
    }

    print_time("performance_CPU.csv", "fusion",
               {"Tiramisu", "Halide"},
               {median(duration_vector_1), median(duration_vector_2)});

    Halide::Tools::save_image(output_tiramisu_h, "./build/fusion_h_tiramisu.png");
    Halide::Tools::save_image(output_tiramisu_k, "./build/fusion_k_tiramisu.png");
    Halide::Tools::save_image(output_ref_h, "./build/fusion_h_ref.png");
    Halide::Tools::save_image(output_ref_k, "./build/fusion_k_ref.png");

    if (CHECK_CORRECTNESS)
	compare_buffers("Fusion",  output_ref_k, output_tiramisu_k);

    return 0;
}
Ejemplo n.º 2
0
int main(int, char**)
{
    std::vector<std::chrono::duration<double, std::milli>> duration_vector;

    Halide::Buffer<uint8_t> input = Halide::Tools::load_image("../rgb.png");

    Halide::Buffer<float> kernel(3, 3);
    kernel(0,0) = 0; kernel(0,1) = 1.0f/5; kernel(0,2) = 0;
    kernel(1,0) = 1.0f/5; kernel(1,1) = 1.0f/5; kernel(1,2) = 1.0f/5;
    kernel(2,0) = 0; kernel(2,1) = 1; kernel(2,2) = 0;

    // Small size discrepancy with Halide benchmark:
    Halide::Buffer<uint8_t> output(input.width(), input.height(), input.channels());

    std::cout << "Dimensions : " << std::endl;
    std::cout << "input.extent(0): " << input.extent(0) << std::endl;  // Rows
    std::cout << "input.extent(1): " << input.extent(1) << std::endl;  // Cols
    std::cout << "input.extent(2): " << input.extent(2) << std::endl;  // Colors

    #ifdef __PROFILE_CUDA__
    cudaProfilerStop();
    #endif

    // Warm up
    pencil_convolution(input.extent(0), input.extent(1), input.extent(1),
                       (uint8_t *) input.raw_buffer()->host,
                       (float *) kernel.raw_buffer()->host,
                       (uint8_t *) output.raw_buffer()->host);

    #ifdef __PROFILE_CUDA__
    cudaProfilerStart();
    #endif

    // Tiramisu
    for (int i = 0; i < 100; i++) {
        auto start = std::chrono::high_resolution_clock::now();
        pencil_convolution(input.extent(0), input.extent(1), input.extent(1),
                           (uint8_t *) input.raw_buffer()->host,
                           (float *) kernel.raw_buffer()->host,
                           (uint8_t *) output.raw_buffer()->host);

        auto end = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double, std::milli> duration = end - start;
        duration_vector.push_back(duration);
    }

    std::cout << "time: " << median(duration_vector) << std::endl;

    return 0;
}
Ejemplo n.º 3
0
void print(Halide::Buffer<T> buf) {
    for (int j = 0; j < std::min(buf.height(), 10); j++) {
        std::stringstream oss;
        for (int i = 0; i < std::min(buf.width(), 10); i++) {
            oss << " [";
            for (int k = 0; k < buf.channels(); k++) {
                oss << std::fixed << std::setprecision(1);
                if (k > 0) {
                    oss << std::setw(4);
                }
                oss << +buf(i, j, k);
            }
            oss << "]";
        }
        LOGI("%s", oss.str().c_str());
    }
}