Exemple #1
0
void getCanonicalSize(const Halide::Buffer<>& buffer, int* width, int* height,
                      int* channels, int* batch)
{
    CV_Assert(buffer.dimensions() == 4);
    *width = buffer.extent(0);
    *height = buffer.extent(1);
    *channels = buffer.extent(2);
    *batch = buffer.extent(3);
}
int main(int, char**)
{
    std::vector<std::chrono::duration<double, std::milli>> duration_vector;

    Halide::Buffer<uint8_t> input = Halide::Tools::load_image("../rgb.png");

    Halide::Buffer<float> kernel(3, 3);
    kernel(0,0) = 0; kernel(0,1) = 1.0f/5; kernel(0,2) = 0;
    kernel(1,0) = 1.0f/5; kernel(1,1) = 1.0f/5; kernel(1,2) = 1.0f/5;
    kernel(2,0) = 0; kernel(2,1) = 1; kernel(2,2) = 0;

    // Small size discrepancy with Halide benchmark:
    Halide::Buffer<uint8_t> output(input.width(), input.height(), input.channels());

    std::cout << "Dimensions : " << std::endl;
    std::cout << "input.extent(0): " << input.extent(0) << std::endl;  // Rows
    std::cout << "input.extent(1): " << input.extent(1) << std::endl;  // Cols
    std::cout << "input.extent(2): " << input.extent(2) << std::endl;  // Colors

    #ifdef __PROFILE_CUDA__
    cudaProfilerStop();
    #endif

    // Warm up
    pencil_convolution(input.extent(0), input.extent(1), input.extent(1),
                       (uint8_t *) input.raw_buffer()->host,
                       (float *) kernel.raw_buffer()->host,
                       (uint8_t *) output.raw_buffer()->host);

    #ifdef __PROFILE_CUDA__
    cudaProfilerStart();
    #endif

    // Tiramisu
    for (int i = 0; i < 100; i++) {
        auto start = std::chrono::high_resolution_clock::now();
        pencil_convolution(input.extent(0), input.extent(1), input.extent(1),
                           (uint8_t *) input.raw_buffer()->host,
                           (float *) kernel.raw_buffer()->host,
                           (uint8_t *) output.raw_buffer()->host);

        auto end = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double, std::milli> duration = end - start;
        duration_vector.push_back(duration);
    }

    std::cout << "time: " << median(duration_vector) << std::endl;

    return 0;
}
int main(int, char**)
{
    std::vector<std::chrono::duration<double,std::milli>> duration_vector_1;
    std::vector<std::chrono::duration<double,std::milli>> duration_vector_2;
    Halide::Buffer<uint8_t> input = Halide::Tools::load_image("./utils/images/rgb.png");
    Halide::Buffer<int32_t> size(2);
    size(0) = input.extent(0);
    size(1) = input.extent(1);

    Halide::Buffer<uint8_t> output_ref_y(input.width(), input.height());
    Halide::Buffer<uint8_t> output_ref_u(input.width()/2, input.height()/2);
    Halide::Buffer<uint8_t> output_ref_v(input.width()/2, input.height()/2);

    Halide::Buffer<uint8_t> output_tiramisu_y(input.width(), input.height());
    Halide::Buffer<uint8_t> output_tiramisu_u(input.width()/2, input.height()/2);
    Halide::Buffer<uint8_t> output_tiramisu_v(input.width()/2, input.height()/2);

    std::cout << "STARTING TEST\n";
    std::cout << "y size (width, height): " << output_tiramisu_y.width() << ", " << output_tiramisu_y.height() << "\n";
    std::cout << "u size (width, height): " << output_tiramisu_u.width() << ", " << output_tiramisu_u.height() << "\n";
    std::cout << "v size (width, height): " << output_tiramisu_v.width() << ", " << output_tiramisu_v.height() << "\n";
    // Warm up
    rgbyuv420gpu_tiramisu(size.raw_buffer(), input.raw_buffer(), output_tiramisu_y.raw_buffer(), output_tiramisu_u.raw_buffer(), output_tiramisu_v.raw_buffer());
    run_halide(input, output_ref_y, output_ref_u, output_ref_v);

    // Tiramisu
    for (int i=0; i<NB_TESTS; i++)
    {
        auto start1 = std::chrono::high_resolution_clock::now();
        rgbyuv420gpu_tiramisu(size.raw_buffer(), input.raw_buffer(), output_tiramisu_y.raw_buffer(), output_tiramisu_u.raw_buffer(), output_tiramisu_v.raw_buffer());
        auto end1 = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double,std::milli> duration1 = end1 - start1;
        duration_vector_1.push_back(duration1);
    }

    // Reference
    for (int i=0; i<NB_TESTS; i++)
    {
        duration_vector_2.push_back(run_halide(input, output_ref_y, output_ref_u, output_ref_v));
    }

    print_time("performance_CPU.csv", "rgbyuv420gpu",
               {"Tiramisu", "Halide"},
               {median(duration_vector_1), median(duration_vector_2)});

    Halide::Tools::save_image(output_tiramisu_y, "./build/rgbyuv420gpu_y_tiramisu.png");
    Halide::Tools::save_image(output_tiramisu_u, "./build/rgbyuv420gpu_u_tiramisu.png");
    Halide::Tools::save_image(output_tiramisu_v, "./build/rgbyuv420gpu_v_tiramisu.png");
    Halide::Tools::save_image(output_ref_y, "./build/rgbyuv420gpu_y_ref.png");
    Halide::Tools::save_image(output_ref_u, "./build/rgbyuv420gpu_u_ref.png");
    Halide::Tools::save_image(output_ref_v, "./build/rgbyuv420gpu_v_ref.png");

    if (CHECK_CORRECTNESS)
    {
        std::cout << "Compare y buffer\n";
    	compare_buffers("benchmark_rgbyuv420gpu", output_tiramisu_y, output_ref_y);
        std::cout << "Compare u buffer\n";
    	compare_buffers("benchmark_rgbyuv420gpu", output_tiramisu_u, output_ref_u);
        std::cout << "Compare v buffer\n";
    	compare_buffers("benchmark_rgbyuv420gpu", output_tiramisu_y, output_ref_y);
    }

    return 0;
}