void prog01() { Halide::Var x; Halide::Func init_cond; init_cond(x) = 1.0f*x; Halide::Image<float_t> input, output; input=init_cond.realize(NX); Halide::ImageParam inPar(Halide::Float(32), 1, "inPar"); Halide::Func cell; cell(x)=inPar(x)+1; { std::vector<Halide::Argument> arg_vect; arg_vect.push_back(Halide::Argument("inPar", true, Halide::Int(32))); cell.compile_to_bitcode("stencil-fusion-01.bc", arg_vect, "blur"); } for (int t =0; t < 100000; ++t) { inPar.set(input); output = cell.realize(NX); swap(output,input); } for (int i = 0; i < NX; ++i) cout << input(i) << " "; cout << endl; }
Halide::Func convolution_layer(Halide::Func input, Halide::Func weights, Halide::Func bias, int filter_size, int input_layers, int pool_size) { // Convolution Halide::Func convolution; Halide::Var x, y, z, w; Halide::RDom r(0, filter_size, 0, filter_size, 0, input_layers); convolution(x, y, z, w) = 0.0f; convolution(x, y, z, w) += weights(r.x, r.y, r.z, z) * input(x + r.x, y + r.y, r.z, w); // Max pool Halide::Func subsample; Halide::RDom s(0, pool_size, 0, pool_size); subsample(x, y, z, w) = 0.0f; subsample(x, y, z, w) = Halide::max(convolution(pool_size * x + s.x, pool_size * y + s.y, z, w), subsample(x, y, z, w)); // Non-linear bias Halide::Func biased; biased(x, y, z, w) = tanh(subsample(x, y, z, w) + bias(z, 0)); Halide::Var x_inner, x_outer, y_inner, y_outer; biased.parallel(w); biased.tile(x, y, x_outer, y_outer, x_inner, y_inner, VECTORS, 2); biased.vectorize(x_inner); biased.unroll(y_inner); return biased; }
int main(int argc, char **argv) { int i, j; Halide::Func black; Halide::Func white; Halide::Var x, y; black(x, y) = 0; white(x, y) = 254; Halide::Image<int32_t> output1 = black.realize(800, 600); Halide::Image<int32_t> output2 = white.realize(800, 600); // Save the output for inspection. It should look like a bright parrot. save(output1, "input1.png"); save(output2, "input2.png"); //Check to see everything is copacetic for( i = 0; i < 800; i ++ ) { for( j = 0; j < 600; j ++ ) if (output2(i, j) != 254 || output1(i, j) != 0) { printf("Failure! Failed at (%d, %d)\n", i, j); return 1; } } printf("Success!\n"); return 0; }
int main(int argc, char **argv) { Halide::Func theFunc = getFunction(); if (argc >= 3) { std::vector<Halide::Argument> arguments = theFunc.infer_arguments(); Halide::Target target = Halide::get_target_from_environment(); target.set_feature(Halide::Target::Feature::UserContext); theFunc.compile_to_object(argv[1] + std::string(".o"), arguments, argv[2], target); return 0; } return 1; }
inline void _autotune_timing_stub(Halide::Func& func) { func.compile_jit(); func.infer_input_bounds(1024,1024); timeval t1, t2; double rv = 0; for (int i = 0; i < 3; i++) { gettimeofday(&t1, NULL); func.realize(1024,1024); gettimeofday(&t2, NULL); double t = (t2.tv_sec - t1.tv_sec) + (t2.tv_usec - t1.tv_usec)/1000000.0; if(i == 0 || t < rv) rv = t; } printf("{\"time\": %.10f}\n", rv); exit(0); }
Halide::Func fully_connected_layer(Halide::Func input, Halide::Func weights, Halide::Func bias, int size) { Halide::Func product; Halide::Var x, y, z; Halide::RDom r(0, size); // Only y = 0 should be used product(x, y, z) = 0.0f; product(x, y, z) += weights(r.x, x) * input(r.x, y, z); product(x, y, z) = tanh(product(x, y, z) + bias(x, 0)); product.vectorize(x, VECTORS); return product; }
std::string func_repr(const h::Func &func) { std::string repr; boost::format f("<halide.Func '%s'>"); repr = boost::str(f % func.name()); return repr; }
void func_compile_to_file0(h::Func &that, const std::string &filename_prefix, const std::vector<h::Argument> &args, const h::Target &target = h::get_target_from_environment()) { that.compile_to_file(filename_prefix, args, target); return; }
void NamedWindow::showImage2D(Halide::Image<uint8_t> im) { static Halide::Func convert("convertToMat2D"); static Halide::ImageParam ip(Halide::UInt(8), 2); static Halide::Var x, y; if (!convert.defined()) { convert(x, y) = ip(x, y); convert.vectorize(x, 4).parallel(y, 4); } ip.set(im); cv::Mat mat(im.height(), im.width(), CV_8UC1, cv::Scalar(0)); convert.realize(Halide::Buffer(Halide::UInt(8), im.width(), im.height(), 0, 0, mat.data)); cv::imshow(name, mat); }
void NamedWindow::showImage3D(Halide::Image<float> im) { static Halide::Func convert("convertToMat3D"); static Halide::ImageParam ip(Halide::Float(32), 3); static Halide::Var x, y, c; if (!convert.defined()) { convert(c, x, y) = Halide::cast<uint8_t>(ip(x, y, 2 - c) * 255); convert.vectorize(x, 4).parallel(y, 4); } ip.set(im); cv::Mat mat(im.height(), im.width(), CV_8UC3, cv::Scalar(0)); convert.realize(Halide::Buffer(Halide::UInt(8), im.channels(), im.width(), im.height(), 0, mat.data)); cv::imshow(name, mat); }
void func_compile_to_header0(h::Func &that, const std::string &filename, const std::vector<h::Argument> &args, const std::string fn_name = "", const h::Target &target = h::get_target_from_environment()) { that.compile_to_header(filename, args, fn_name, target); return; }
void func_compile_to_lowered_stmt0(h::Func &that, const std::string &filename, const std::vector<h::Argument> &args, h::StmtOutputFormat fmt = h::Text, const h::Target &target = h::get_target_from_environment()) { that.compile_to_lowered_stmt(filename, args, fmt, target); return; }
inline void _autotune_timing_stub(Halide::Func& func) { func.compile_jit(); func.infer_input_bounds(AUTOTUNE_N); timeval t1, t2; double rv = 0; const unsigned int timeout = AUTOTUNE_LIMIT; alarm(timeout); for (int i = 0; i < AUTOTUNE_TRIALS; i++) { gettimeofday(&t1, NULL); func.realize(AUTOTUNE_N); gettimeofday(&t2, NULL); alarm(0); // disable alarm double t = (t2.tv_sec - t1.tv_sec) + (t2.tv_usec - t1.tv_usec)/1000000.0; if(i == 0 || t < rv) rv = t; } printf("{\"time\": %.10f}\n", rv); exit(0); }
void classify(Halide::Func layer0, Halide::Func *weights, Halide::Func *bias) { // Layer 1 -- Convolution Halide::Func layer1 = convolution_layer(layer0, weights[0], bias[0], FILTER_SIZE, LAYER0_NODES, POOL_SIZE); // Layer 2 -- Convolution Halide::Func layer2 = convolution_layer(layer1, weights[1], bias[1], FILTER_SIZE, LAYER1_NODES, POOL_SIZE); // Flatten many feature maps onto a single level for future layers Halide::Func flattened = flatten(layer2, REDUCE_IMAGE_SIZE); // Layer 3 -- Fully connected hidden layer Halide::Func layer3 = fully_connected_layer(flattened, weights[2], bias[2], LAYER2_NODES * REDUCE_IMAGE_SIZE * REDUCE_IMAGE_SIZE); // Layer 4 -- Fully connected hidden layer Halide::Func layer4 = fully_connected_layer(layer3, weights[3], bias[3], LAYER3_NODES); // Layer 5 -- Logostic Softmax / classification Halide::Func layer5 = classification(layer4, LAYER4_NODES); layer0.compute_root(); layer1.compute_root(); layer2.compute_root(); flattened.compute_root(); layer3.compute_root(); layer4.compute_root(); // Realize to perform computation Halide::Image<int> output(1, 1, NUM_IMAGES); layer5.realize(output); }
inline void _autotune_timing_stub(Halide::Func& func) { func.compile_jit(); // TODO: this assumes scalar/non-Tuple outputs - should generalize to a Realization Halide::Type out_type = func.output_types()[0]; buffer_t out_size_buf; { // Use the Buffer constructor as a helper to set up the buffer_t, // but then throw away its allocation which we don't really want. Halide::Buffer bufinit(out_type, AUTOTUNE_N); out_size_buf = *bufinit.raw_buffer(); out_size_buf.host = NULL; } Halide::Buffer out_size(out_type, &out_size_buf); assert(out_size.host_ptr() == NULL); // make sure we don't have an allocation func.infer_input_bounds(out_size); // allocate the real output using the inferred mins + extents Halide::Buffer output( out_type, out_size.extent(0), out_size.extent(1), out_size.extent(2), out_size.extent(3), NULL, "output" ); output.set_min( out_size.min(0), out_size.min(1), out_size.min(2), out_size.min(3) ); // re-run input inference on enlarged output buffer func.unbind_image_params(); // TODO: iterate to convergence func.infer_input_bounds(output); timeval t1, t2; double rv = 0; const unsigned int timeout = AUTOTUNE_LIMIT; alarm(timeout); for (int i = 0; i < AUTOTUNE_TRIALS; i++) { gettimeofday(&t1, NULL); func.realize(output); gettimeofday(&t2, NULL); alarm(0); // disable alarm double t = (t2.tv_sec - t1.tv_sec) + (t2.tv_usec - t1.tv_usec)/1000000.0; if(i == 0 || t < rv) rv = t; } printf("{\"time\": %.10f}\n", rv); exit(0); }
void func_compile_jit1(h::Func &that, const h::Target &target = h::get_target_from_environment()) { that.compile_jit(target); return; }
void func_realize3(h::Func &that, h::Buffer dst, const h::Target &target = h::Target()) { that.realize(dst, target); return; }
void func_realize2(h::Func &that, h::Realization dst, const h::Target &target = h::Target()) { that.realize(dst, target); return; }
void func_define_extern1(h::Func &that,const std::string &function_name, const std::vector<h::ExternFuncArgument> ¶ms, const std::vector<h::Type> &output_types, int dimensionality) { return that.define_extern(function_name, params, output_types, dimensionality); }
int resize_with_halide() { Halide::ImageParam input {Halide::type_of<uint8_t>(), 3}; //_/_/_/ load a source image and repeat its edges Halide::Func src_image {}; src_image = Halide::BoundaryConditions::repeat_edge(input); //_/_/_/ describe algorithm Halide::Param<float> src_rows {}; Halide::Param<float> src_cols {}; Halide::Param<float> dst_rows {}; Halide::Param<float> dst_cols {}; // const float sc = 500.0f/4999;//static_cast<float>(src_cols.get()) / dst_cols.get(); // const float sr = 350.0f/3499;//static_cast<float>(src_rows.get()) / dst_rows.get(); const auto sc = src_cols / dst_cols; const auto sr = src_rows / dst_rows; Halide::Var i {}; Halide::Var j {}; Halide::Var c {}; auto fj = j * sr; auto cj0 = Halide::cast<int>(fj); auto cj1 = cj0 + 1; auto dj = fj - cj0; auto fi = i * sc; auto ci0 = Halide::cast<int>(fi); auto ci1 = ci0 + 1; auto di = fi - ci0; const auto c0 = (1.0f - dj) * (1.0f - di); const auto c1 = (1.0f - dj) * di; const auto c2 = dj * (1.0f - di); const auto c3 = dj * di; const auto& src_pixel0 = src_image(ci0, cj0, c); const auto& src_pixel1 = src_image(ci1, cj0, c); const auto& src_pixel2 = src_image(ci0, cj1, c); const auto& src_pixel3 = src_image(ci1, cj1, c); Halide::Func resize {}; resize(i, j, c) = Halide::saturating_cast<uint8_t>(c0 * src_pixel0 + c1 * src_pixel1 + c2 * src_pixel2 + c3 * src_pixel3); //_/_/_/ describe scheduling Halide::Var i_inner, j_inner; auto x_vector_size = 64; resize.compute_root(); resize.tile(i, j, i_inner, j_inner, x_vector_size, 4).vectorize(i_inner, 16).parallel(j); //_/_/_/ save a static library const auto path = "/Users/kumada/Projects/cct_blog/halide/sample_4/sample_4/resize"; resize.compile_to_static_library( path, {input, src_rows, src_cols, dst_rows, dst_cols}, "resize"); return 1; }
h::Realization func_realize1(h::Func &that, int x_size=0, int y_size=0, int z_size=0, int w_size=0, const h::Target &target = h::Target()) { return that.realize(x_size, y_size, z_size, w_size, target); }
h::Realization func_realize0(h::Func &that, std::vector<int32_t> sizes, const h::Target &target = h::Target()) { return that.realize(sizes, target); }
int main(int argc, char **argv) { // This program defines a single-stage imaging pipeline that // brightens an image. // First we'll load the input image we wish to brighten. Halide::Image<uint8_t> input = load<uint8_t>("../apps/images/rgb.png"); // Next we define our Func object that represents our one pipeline // stage. Halide::Func brighter; // Our Func will have three arguments, representing the position // in the image and the color channel. Halide treats color // channels as an extra dimension of the image. Halide::Var x, y, c; // Normally we'd probably write the whole function definition on // one line. Here we'll break it apart so we can explain what // we're doing at every step. // For each pixel of the input image. Halide::Expr value = input(x, y, c); // Cast it to a floating point value. value = Halide::cast<float>(value); // Multiply it by 1.5 to brighten it. Halide represents real // numbers as floats, not doubles, so we stick an 'f' on the end // of our constant. value = value * 1.5f; // Clamp it to be less than 255, so we don't get overflow when we // cast it back to an 8-bit unsigned int. value = Halide::min(value, 255.0f); // Cast it back to an 8-bit unsigned integer. value = Halide::cast<uint8_t>(value); // Define the function. brighter(x, y, c) = value; // The equivalent one-liner to all of the above is: // // brighter(x, y, c) = Halide::cast<uint8_t>(min(input(x, y, c) * 1.5f, 255)); // // In the shorter version: // - I skipped the cast to float, because multiplying by 1.5f does // that automatically. // - I also used integer constants in clamp, because they get cast // to match the type of the first argument. // - I left the Halide:: off clamp. It's unnecessary due to Koenig // lookup. // Remember. All we've done so far is build a representation of a // Halide program in memory. We haven't actually processed any // pixels yet. We haven't even compiled that Halide program yet. // So now we'll realize the Func. The size of the output image // should match the size of the input image. If we just wanted to // brighten a portion of the input image we could request a // smaller size. If we request a larger size Halide will throw an // error at runtime telling us we're trying to read out of bounds // on the input image. Halide::Image<uint8_t> output = brighter.realize(input.width(), input.height(), input.channels()); // Save the output for inspection. It should look like a bright parrot. save(output, "brighter.png"); printf("Success!\n"); return 0; }
void func_compile_jit0(h::Func &that) { that.compile_jit(); return; }
int main(int argc, char **argv) { // This program defines a single-stage imaging pipeline that // outputs a grayscale diagonal gradient. // A 'Func' object represents a pipeline stage. It's a pure // function that defines what value each pixel should have. You // can think of it as a computed image. Halide::Func gradient; // Var objects are names to use as variables in the definition of // a Func. They have no meaning by themselves. Halide::Var x, y; // We typically use Vars named 'x' and 'y' to correspond to the x // and y axes of an image, and we write them in that order. If // you're used to thinking of images as having rows and columns, // then x is the column index, and y is the row index. // Funcs are defined at any integer coordinate of its variables as // an Expr in terms of those variables and other functions. // Here, we'll define an Expr which has the value x + y. Vars have // appropriate operator overloading so that expressions like // 'x + y' become 'Expr' objects. Halide::Expr e = x + y; // Now we'll add a definition for the Func object. At pixel x, y, // the image will have the value of the Expr e. On the left hand // side we have the Func we're defining and some Vars. On the right // hand side we have some Expr object that uses those same Vars. gradient(x, y) = e; // This is the same as writing: // // gradient(x, y) = x + y; // // which is the more common form, but we are showing the // intermediate Expr here for completeness. // That line of code defined the Func, but it didn't actually // compute the output image yet. At this stage it's just Funcs, // Exprs, and Vars in memory, representing the structure of our // imaging pipeline. We're meta-programming. This C++ program is // constructing a Halide program in memory. Actually computing // pixel data comes next. // Now we 'realize' the Func, which JIT compiles some code that // implements the pipeline we've defined, and then runs it. We // also need to tell Halide the domain over which to evaluate the // Func, which determines the range of x and y above, and the // resolution of the output image. Halide.h also provides a basic // templatized Image type we can use. We'll make an 800 x 600 // image. Halide::Image<int32_t> output = gradient.realize(800, 600); // Halide does type inference for you. Var objects represent // 32-bit integers, so the Expr object 'x + y' also represents a // 32-bit integer, and so 'gradient' defines a 32-bit image, and // so we got a 32-bit signed integer image out when we call // 'realize'. Halide types and type-casting rules are equivalent // to C. // Let's check everything worked, and we got the output we were // expecting: for (int j = 0; j < output.height(); j++) { for (int i = 0; i < output.width(); i++) { // We can access a pixel of an Image object using similar // syntax to defining and using functions. if (output(i, j) != i + j) { printf("Something went wrong!\n" "Pixel %d, %d was supposed to be %d, but instead it's %d\n", i, j, i+j, output(i, j)); return -1; } } } // Everything worked! We defined a Func, then called 'realize' on // it to generate and run machine code that produced an Image. printf("Success!\n"); return 0; }