double test(Func f, bool test_correctness = true) { f.compile_to_assembly(f.name() + ".s", Internal::vec<Argument>(input), f.name()); f.compile_jit(); f.realize(output); if (test_correctness) { for (int y = 0; y < output.height(); y++) { for (int x = 0; x < output.width(); x++) { int ix1 = std::max(std::min(x, MAX), MIN); int ix2 = std::max(std::min(x+1, MAX), MIN); uint16_t correct = input(ix1, y) * 3 + input(ix2, y); if (output(x, y) != correct) { printf("output(%d, %d) = %d instead of %d\n", x, y, output(x, y), correct); exit(-1); } } } } double t1 = currentTime(); for (int i = 0; i < 10; i++) { f.realize(output); } return currentTime() - t1; }
int main(int argc, char **argv) { ImageParam src(UInt(8), 1); Func dst; Var x; dst(x) = src(x); Var xo; dst.split(x, xo, x, 8*4096); // dst.parallel(xo); speeds up halide's memcpy considerably, but doesn't seem sporting dst.vectorize(x, 16); dst.compile_to_assembly("memcpy.s", {src}, "memcpy"); dst.compile_jit(); const int32_t buffer_size = 12345678; const int iterations = 50; Image<uint8_t> input(buffer_size); Image<uint8_t> output(buffer_size); src.set(input); // Get past one-time set-up issues for the ptx backend. dst.realize(output); double halide = 0, system = 0; for (int i = 0; i < iterations; i++) { double t1 = current_time(); dst.realize(output); dst.realize(output); dst.realize(output); double t2 = current_time(); memcpy(output.data(), input.data(), input.width()); memcpy(output.data(), input.data(), input.width()); memcpy(output.data(), input.data(), input.width()); double t3 = current_time(); system += t3-t2; halide += t2-t1; } printf("system memcpy: %.3e byte/s\n", (buffer_size / system) * 3 * 1000 * iterations); printf("halide memcpy: %.3e byte/s\n", (buffer_size / halide) * 3 * 1000 * iterations); // memcpy will win by a little bit for large inputs because it uses streaming stores if (halide > system * 2) { printf("Halide memcpy is slower than it should be.\n"); return -1; } printf("Success!\n"); return 0; }
int main(int argc, char **argv) { // The camera pipe is specialized on the 2592x1968 images that // come in, so we'll just use an image instead of a uniform image. ImageParam input(UInt(16), 2); ImageParam matrix_3200(Float(32), 2, "m3200"), matrix_7000(Float(32), 2, "m7000"); Param<float> color_temp("color_temp"); //, 3200.0f); Param<float> gamma("gamma"); //, 1.8f); Param<float> contrast("contrast"); //, 10.0f); Param<int> blackLevel("blackLevel"); //, 25); Param<int> whiteLevel("whiteLevel"); //, 1023); // shift things inwards to give us enough padding on the // boundaries so that we don't need to check bounds. We're going // to make a 2560x1920 output image, just like the FCam pipe, so // shift by 16, 12 Func shifted; shifted(x, y) = input(x+16, y+12); // Parameterized output type, because LLVM PTX (GPU) backend does not // currently allow 8-bit computations int bit_width = atoi(argv[1]); Type result_type = UInt(bit_width); // Pick a schedule schedule = atoi(argv[2]); // Build the pipeline Func processed = process(shifted, result_type, matrix_3200, matrix_7000, color_temp, gamma, contrast, blackLevel, whiteLevel); // We can generate slightly better code if we know the output is a whole number of tiles. Expr out_width = processed.output_buffer().width(); Expr out_height = processed.output_buffer().height(); processed .bound(tx, 0, (out_width/32)*32) .bound(ty, 0, (out_height/32)*32); //string s = processed.serialize(); //printf("%s\n", s.c_str()); std::vector<Argument> args = {color_temp, gamma, contrast, blackLevel, whiteLevel, input, matrix_3200, matrix_7000}; processed.compile_to_file("curved", args); processed.compile_to_assembly("curved.s", args); return 0; }
int main(int argc, char **argv) { // The camera pipe is specialized on the 2592x1968 images that // come in, so we'll just use an image instead of a uniform image. ImageParam input(UInt(16), 2); ImageParam matrix_3200(Float(32), 2, "m3200"), matrix_7000(Float(32), 2, "m7000"); Param<float> color_temp("color_temp"); //, 3200.0f); Param<float> gamma("gamma"); //, 1.8f); Param<float> contrast("contrast"); //, 10.0f); Param<int> blackLevel("blackLevel"); //, 25); Param<int> whiteLevel("whiteLevel"); //, 1023); // shift things inwards to give us enough padding on the // boundaries so that we don't need to check bounds. We're going // to make a 2560x1920 output image, just like the FCam pipe, so // shift by 16, 12. We also convert it to be signed, so we can deal // with values that fall below 0 during processing. Func shifted; shifted(x, y) = cast<int16_t>(input(x+16, y+12)); // Parameterized output type, because LLVM PTX (GPU) backend does not // currently allow 8-bit computations int bit_width = atoi(argv[1]); Type result_type = UInt(bit_width); // Pick a target target = get_target_from_environment(); // Build the pipeline Func processed = process(shifted, result_type, matrix_3200, matrix_7000, color_temp, gamma, contrast, blackLevel, whiteLevel); std::vector<Argument> args = {color_temp, gamma, contrast, blackLevel, whiteLevel, input, matrix_3200, matrix_7000}; // TODO: it would be more efficient to call compile_to() a single time with the right arguments processed.compile_to_static_library("curved", args, "curved", target); processed.compile_to_assembly("curved.s", args, target); return 0; }
int main(int argc, char **argv) { Func f, g; Var x, y; ImageParam param(Int(32), 2); Buffer<int> image1(128, 73); Buffer<int> image2(144, 23); f(x, y) = param(x, y)*2; param.dim(0).set_bounds(0, 128); f.set_error_handler(my_error_handler); // This should be fine param.set(image1); error_occurred = false; f.realize(20, 20); if (error_occurred) { printf("Error incorrectly raised\n"); return -1; } // This should be an error, because dimension 0 of image 2 is not from 0 to 128 like we promised param.set(image2); error_occurred = false; f.realize(20, 20); if (!error_occurred) { printf("Error incorrectly not raised\n"); return -1; } // Now try constraining the output buffer of a function g(x, y) = x*y; g.set_error_handler(my_error_handler); g.output_buffer().dim(0).set_stride(2); error_occurred = false; g.realize(image1); if (!error_occurred) { printf("Error incorrectly not raised when constraining output buffer\n"); return -1; } Func h; h(x, y) = x*y; h.set_error_handler(my_error_handler); h.output_buffer() .dim(0) .set_stride(1) .set_bounds(0, ((h.output_buffer().dim(0).extent())/8)*8) .dim(1) .set_bounds(0, image1.dim(1).extent()); error_occurred = false; h.realize(image1); std::string assembly_file = Internal::get_test_tmp_dir() + "h.s"; Internal::ensure_no_file_exists(assembly_file); // Also check it compiles ok without an inferred argument list h.compile_to_assembly(assembly_file, {image1}, "h"); if (error_occurred) { printf("Error incorrectly raised when constraining output buffer\n"); return -1; } Internal::assert_file_exists(assembly_file); printf("Success!\n"); return 0; }
int main(int argc, char **argv) { Func f, g; Var x, y; ImageParam param(Int(32), 2); Image<int> image1(128, 73); Image<int> image2(144, 23); f(x, y) = param(x, y)*2; param.set_bounds(0, 0, 128); f.set_error_handler(my_error_handler); // This should be fine param.set(image1); error_occurred = false; f.realize(20, 20); if (error_occurred) { printf("Error incorrectly raised\n"); return -1; } // This should be an error, because dimension 0 of image 2 is not from 0 to 128 like we promised param.set(image2); error_occurred = false; f.realize(20, 20); if (!error_occurred) { printf("Error incorrectly not raised\n"); return -1; } // Now try constraining the output buffer of a function g(x, y) = x*y; g.set_error_handler(my_error_handler); g.output_buffer().set_stride(0, 2); error_occurred = false; g.realize(image1); if (!error_occurred) { printf("Error incorrectly not raised when constraining output buffer\n"); return -1; } Func h; h(x, y) = x*y; h.set_error_handler(my_error_handler); h.output_buffer() .set_stride(0, 1) .set_bounds(1, 0, image1.extent(1)) .set_bounds(0, 0, ((h.output_buffer().extent(0))/8)*8); error_occurred = false; h.realize(image1); // Also check it compiles ok without an inferred argument list h.compile_to_assembly("h.s", Internal::vec<Argument>(image1), "h"); if (error_occurred) { printf("Error incorrectly raised when constraining output buffer\n"); return -1; } printf("Success!\n"); return 0; }