int main(int, char**) { std::vector<std::chrono::duration<double,std::milli>> duration_vector_1; std::vector<std::chrono::duration<double,std::milli>> duration_vector_2; Halide::Buffer<uint8_t> input = Halide::Tools::load_image("./utils/images/rgb.png"); Halide::Buffer<uint8_t> output_ref_f(input.width(), input.height(), input.channels()); Halide::Buffer<uint8_t> output_ref_g(input.width(), input.height(), input.channels()); Halide::Buffer<uint8_t> output_ref_h(input.width(), input.height(), input.channels()); Halide::Buffer<uint8_t> output_ref_k(input.width(), input.height(), input.channels()); Halide::Buffer<uint8_t> output_tiramisu_f(input.width(), input.height(), input.channels()); Halide::Buffer<uint8_t> output_tiramisu_g(input.width(), input.height(), input.channels()); Halide::Buffer<uint8_t> output_tiramisu_h(input.width(), input.height(), input.channels()); Halide::Buffer<uint8_t> output_tiramisu_k(input.width(), input.height(), input.channels()); // Warm up fusion_tiramisu(input.raw_buffer(), output_tiramisu_f.raw_buffer(), output_tiramisu_g.raw_buffer(), output_tiramisu_h.raw_buffer(), output_tiramisu_k.raw_buffer()); fusion_ref(input.raw_buffer(), output_ref_f.raw_buffer(), output_ref_g.raw_buffer(), output_ref_h.raw_buffer(), output_ref_k.raw_buffer()); // Tiramisu for (int i=0; i<NB_TESTS; i++) { auto start1 = std::chrono::high_resolution_clock::now(); fusion_tiramisu(input.raw_buffer(), output_tiramisu_f.raw_buffer(), output_tiramisu_g.raw_buffer(), output_tiramisu_h.raw_buffer(), output_tiramisu_k.raw_buffer()); auto end1 = std::chrono::high_resolution_clock::now(); std::chrono::duration<double,std::milli> duration1 = end1 - start1; duration_vector_1.push_back(duration1); } // Reference for (int i=0; i<NB_TESTS; i++) { auto start2 = std::chrono::high_resolution_clock::now(); fusion_ref(input.raw_buffer(), output_ref_f.raw_buffer(), output_ref_g.raw_buffer(), output_ref_h.raw_buffer(), output_ref_k.raw_buffer()); auto end2 = std::chrono::high_resolution_clock::now(); std::chrono::duration<double,std::milli> duration2 = end2 - start2; duration_vector_2.push_back(duration2); } print_time("performance_CPU.csv", "fusion", {"Tiramisu", "Halide"}, {median(duration_vector_1), median(duration_vector_2)}); Halide::Tools::save_image(output_tiramisu_h, "./build/fusion_h_tiramisu.png"); Halide::Tools::save_image(output_tiramisu_k, "./build/fusion_k_tiramisu.png"); Halide::Tools::save_image(output_ref_h, "./build/fusion_h_ref.png"); Halide::Tools::save_image(output_ref_k, "./build/fusion_k_ref.png"); if (CHECK_CORRECTNESS) compare_buffers("Fusion", output_ref_k, output_tiramisu_k); return 0; }
int main(int, char**) { std::vector<std::chrono::duration<double, std::milli>> duration_vector; Halide::Buffer<uint8_t> input = Halide::Tools::load_image("../rgb.png"); Halide::Buffer<float> kernel(3, 3); kernel(0,0) = 0; kernel(0,1) = 1.0f/5; kernel(0,2) = 0; kernel(1,0) = 1.0f/5; kernel(1,1) = 1.0f/5; kernel(1,2) = 1.0f/5; kernel(2,0) = 0; kernel(2,1) = 1; kernel(2,2) = 0; // Small size discrepancy with Halide benchmark: Halide::Buffer<uint8_t> output(input.width(), input.height(), input.channels()); std::cout << "Dimensions : " << std::endl; std::cout << "input.extent(0): " << input.extent(0) << std::endl; // Rows std::cout << "input.extent(1): " << input.extent(1) << std::endl; // Cols std::cout << "input.extent(2): " << input.extent(2) << std::endl; // Colors #ifdef __PROFILE_CUDA__ cudaProfilerStop(); #endif // Warm up pencil_convolution(input.extent(0), input.extent(1), input.extent(1), (uint8_t *) input.raw_buffer()->host, (float *) kernel.raw_buffer()->host, (uint8_t *) output.raw_buffer()->host); #ifdef __PROFILE_CUDA__ cudaProfilerStart(); #endif // Tiramisu for (int i = 0; i < 100; i++) { auto start = std::chrono::high_resolution_clock::now(); pencil_convolution(input.extent(0), input.extent(1), input.extent(1), (uint8_t *) input.raw_buffer()->host, (float *) kernel.raw_buffer()->host, (uint8_t *) output.raw_buffer()->host); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration<double, std::milli> duration = end - start; duration_vector.push_back(duration); } std::cout << "time: " << median(duration_vector) << std::endl; return 0; }
void print(Halide::Buffer<T> buf) { for (int j = 0; j < std::min(buf.height(), 10); j++) { std::stringstream oss; for (int i = 0; i < std::min(buf.width(), 10); i++) { oss << " ["; for (int k = 0; k < buf.channels(); k++) { oss << std::fixed << std::setprecision(1); if (k > 0) { oss << std::setw(4); } oss << +buf(i, j, k); } oss << "]"; } LOGI("%s", oss.str().c_str()); } }