void getCanonicalSize(const Halide::Buffer<>& buffer, int* width, int* height, int* channels, int* batch) { CV_Assert(buffer.dimensions() == 4); *width = buffer.extent(0); *height = buffer.extent(1); *channels = buffer.extent(2); *batch = buffer.extent(3); }
int main(int, char**) { std::vector<std::chrono::duration<double, std::milli>> duration_vector; Halide::Buffer<uint8_t> input = Halide::Tools::load_image("../rgb.png"); Halide::Buffer<float> kernel(3, 3); kernel(0,0) = 0; kernel(0,1) = 1.0f/5; kernel(0,2) = 0; kernel(1,0) = 1.0f/5; kernel(1,1) = 1.0f/5; kernel(1,2) = 1.0f/5; kernel(2,0) = 0; kernel(2,1) = 1; kernel(2,2) = 0; // Small size discrepancy with Halide benchmark: Halide::Buffer<uint8_t> output(input.width(), input.height(), input.channels()); std::cout << "Dimensions : " << std::endl; std::cout << "input.extent(0): " << input.extent(0) << std::endl; // Rows std::cout << "input.extent(1): " << input.extent(1) << std::endl; // Cols std::cout << "input.extent(2): " << input.extent(2) << std::endl; // Colors #ifdef __PROFILE_CUDA__ cudaProfilerStop(); #endif // Warm up pencil_convolution(input.extent(0), input.extent(1), input.extent(1), (uint8_t *) input.raw_buffer()->host, (float *) kernel.raw_buffer()->host, (uint8_t *) output.raw_buffer()->host); #ifdef __PROFILE_CUDA__ cudaProfilerStart(); #endif // Tiramisu for (int i = 0; i < 100; i++) { auto start = std::chrono::high_resolution_clock::now(); pencil_convolution(input.extent(0), input.extent(1), input.extent(1), (uint8_t *) input.raw_buffer()->host, (float *) kernel.raw_buffer()->host, (uint8_t *) output.raw_buffer()->host); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration<double, std::milli> duration = end - start; duration_vector.push_back(duration); } std::cout << "time: " << median(duration_vector) << std::endl; return 0; }
int main(int, char**) { std::vector<std::chrono::duration<double,std::milli>> duration_vector_1; std::vector<std::chrono::duration<double,std::milli>> duration_vector_2; Halide::Buffer<uint8_t> input = Halide::Tools::load_image("./utils/images/rgb.png"); Halide::Buffer<int32_t> size(2); size(0) = input.extent(0); size(1) = input.extent(1); Halide::Buffer<uint8_t> output_ref_y(input.width(), input.height()); Halide::Buffer<uint8_t> output_ref_u(input.width()/2, input.height()/2); Halide::Buffer<uint8_t> output_ref_v(input.width()/2, input.height()/2); Halide::Buffer<uint8_t> output_tiramisu_y(input.width(), input.height()); Halide::Buffer<uint8_t> output_tiramisu_u(input.width()/2, input.height()/2); Halide::Buffer<uint8_t> output_tiramisu_v(input.width()/2, input.height()/2); std::cout << "STARTING TEST\n"; std::cout << "y size (width, height): " << output_tiramisu_y.width() << ", " << output_tiramisu_y.height() << "\n"; std::cout << "u size (width, height): " << output_tiramisu_u.width() << ", " << output_tiramisu_u.height() << "\n"; std::cout << "v size (width, height): " << output_tiramisu_v.width() << ", " << output_tiramisu_v.height() << "\n"; // Warm up rgbyuv420gpu_tiramisu(size.raw_buffer(), input.raw_buffer(), output_tiramisu_y.raw_buffer(), output_tiramisu_u.raw_buffer(), output_tiramisu_v.raw_buffer()); run_halide(input, output_ref_y, output_ref_u, output_ref_v); // Tiramisu for (int i=0; i<NB_TESTS; i++) { auto start1 = std::chrono::high_resolution_clock::now(); rgbyuv420gpu_tiramisu(size.raw_buffer(), input.raw_buffer(), output_tiramisu_y.raw_buffer(), output_tiramisu_u.raw_buffer(), output_tiramisu_v.raw_buffer()); auto end1 = std::chrono::high_resolution_clock::now(); std::chrono::duration<double,std::milli> duration1 = end1 - start1; duration_vector_1.push_back(duration1); } // Reference for (int i=0; i<NB_TESTS; i++) { duration_vector_2.push_back(run_halide(input, output_ref_y, output_ref_u, output_ref_v)); } print_time("performance_CPU.csv", "rgbyuv420gpu", {"Tiramisu", "Halide"}, {median(duration_vector_1), median(duration_vector_2)}); Halide::Tools::save_image(output_tiramisu_y, "./build/rgbyuv420gpu_y_tiramisu.png"); Halide::Tools::save_image(output_tiramisu_u, "./build/rgbyuv420gpu_u_tiramisu.png"); Halide::Tools::save_image(output_tiramisu_v, "./build/rgbyuv420gpu_v_tiramisu.png"); Halide::Tools::save_image(output_ref_y, "./build/rgbyuv420gpu_y_ref.png"); Halide::Tools::save_image(output_ref_u, "./build/rgbyuv420gpu_u_ref.png"); Halide::Tools::save_image(output_ref_v, "./build/rgbyuv420gpu_v_ref.png"); if (CHECK_CORRECTNESS) { std::cout << "Compare y buffer\n"; compare_buffers("benchmark_rgbyuv420gpu", output_tiramisu_y, output_ref_y); std::cout << "Compare u buffer\n"; compare_buffers("benchmark_rgbyuv420gpu", output_tiramisu_u, output_ref_u); std::cout << "Compare v buffer\n"; compare_buffers("benchmark_rgbyuv420gpu", output_tiramisu_y, output_ref_y); } return 0; }