示例#1
0
void copy_float(std::string suffix, ImageParam input8, const int channels) {
    Var x, y, c;
    Func input;
    input(x, y, c) = input8(clamp(x, input8.left(), input8.right()),
                            clamp(y, input8.top(), input8.bottom()), c);

    Func result("result");
    result(x, y, c) = input(x, y, c);
    result.bound(c, 0, channels);

    // Unset default constraints so that specialization works.
    result.output_buffer().set_stride(0, Expr());

    Expr interleaved =
        (result.output_buffer().stride(0) == channels &&
         result.output_buffer().stride(2) == 1);

    if (suffix == "_rs") {
        result.shader(x, y, c, DeviceAPI::Renderscript);
        result.specialize(interleaved).vectorize(c);
    } else {
        result.reorder(c, x, y)
            .parallel(y)
            .unroll(c)
            .vectorize(x, 4)
            .specialize(interleaved);
    }
    // non-specialized version is planar

    std::vector<Argument> args;
    args.push_back(input8);
    std::string fn_name = "generated_copy" + suffix + "_float";
    result.compile_to_file(fn_name, args, fn_name);
}
示例#2
0
void blur_uint8(std::string suffix, ImageParam input8, const int channels) {
    Var x, y, c;
    Func input;
    input(x, y, c) = input8(clamp(x, input8.left(), input8.right()),
                            clamp(y, input8.top(), input8.bottom()), c);

    Func blur_x("blur_x");

    blur_x(x, y, c) = cast<uint8_t>(
        (cast<uint16_t>(input(x, y, c)) +
        input(x + 1, y, c) +
        input(x + 2, y, c)) / 3);

    Func result("result");
    result(x, y, c) = cast<uint8_t>(
        (cast<uint16_t>(blur_x(x, y, c)) +
        blur_x(x, y + 1, c) +
        blur_x(x, y + 2, c)) / 3);

    // Unset default constraints so that specialization works.
    result.output_buffer().set_stride(0, Expr());

    result.bound(c, 0, channels);

    Expr interleaved =
        (result.output_buffer().stride(0) == channels &&
         result.output_buffer().stride(2) == 1);
    Expr planar = result.output_buffer().stride(0) == 1;

    if (suffix == "_rs") {
        result.shader(x, y, c, DeviceAPI::Renderscript);
        result.specialize(interleaved).vectorize(c);
        // non-specialized version is planar
    } else {
        Var yi;
        result
            .reorder(c, x, y)
            .unroll(c)
            .split(y, y, yi, 32)
            .parallel(y)
            .vectorize(x, 8);
        result.specialize(interleaved);
        result.specialize(planar);
        // blur_x is compute at result, so it's included in result's
        // specializations.
        blur_x.store_at(result, y)
            .compute_at(result, yi)
            .reorder(c, x, y)
            .unroll(c)
            .vectorize(x, 8);
    }

    std::vector<Argument> args;
    args.push_back(input8);
    std::string fn_name = "generated_blur" + suffix + "_uint8";
    result.compile_to_file(fn_name, args, fn_name);
}
示例#3
0
文件: RDom.cpp 项目: bleibig/Halide
RDom::RDom(ImageParam p) {
    static string var_names[] = {"x", "y", "z", "w"};
    std::vector<ReductionVariable> vars;
    for (int i = 0; i < p.dimensions(); i++) {
        ReductionVariable var = {
            p.name() + "$" + var_names[i],
            p.dim(i).min(),
            p.dim(i).extent()
        };
        vars.push_back(var);
    }

    dom = ReductionDomain(vars);
    init_vars(p.name());
}
示例#4
0
    Func build() {
        Expr width = input.width();
        Expr height = input.height();
        
        //Input
        Func input_func("in");
        input_func(x, y, c) = input(x, y, c);

        //Warping
        Func K_input = K_grad_mat(input_func, width, height);

        //Allow for arbitrary strides
        input.set_stride(0, Expr());
        K_input.output_buffer().set_stride(0, Expr()); 

        return K_input;
    }
示例#5
0
    Func build() override {
        Expr width = input.width();
        Expr height = input.height();

        // Our input is an ImageParam, but blur_cols takes a Func, so
        // we define a trivial func to wrap the input.
        Func input_func;
        input_func(x, y, c) = input(x, y, c);

        // First, blur the columns of the input.
        Func blury_T = blur_cols_transpose(input_func, height, alpha);

        // Blur the columns again (the rows of the original).
        Func blur = blur_cols_transpose(blury_T, width, alpha);

        // Scheduling is done inside blur_cols_transpose.

        return blur;
    }
示例#6
0
    Func build() {
        Expr width = input.width();
        Expr height = input.height();
        
        Expr width_kernel = K.width();
        Expr height_kernel = K.height();

        //Input
        Func input_func("in");
        input_func(x, y, c) = input(x, y, c);
        
        //Input H
        Func K_func("K");
        K_func(i, j, c) = K(i, j, c);

        //Warping
        Func conv_input = A_conv(input_func, width, height, K_func, width_kernel, height_kernel);

        //Allow for arbitrary strides
        input.set_stride(0, Expr());
        K.set_stride(0, Expr());
        conv_input.output_buffer().set_stride(0, Expr()); 

        return conv_input;
    }
示例#7
0
/* Do n unrolled iterations of game of life on a torus */
Func gameOfLife(ImageParam input, int n) {
    Var x, y;
    Func in;
    if (n == 1) {
        in(x, y) = input(x, y);
    } else {
        in = gameOfLife(input, n-1);
        in.compute_root();
    }

    Expr w = input.width(), h = input.height();
    Expr W = (x+w-1) % w, E = (x+1) % w, N = (y+h-1) % h, S = (y+1) % h;
    Expr livingNeighbors = (in(W, N) + in(x, N) +
                            in(E, N) + in(W, y) + 
                            in(E, y) + in(W, S) +
                            in(x, S) + in(E, S));    
    Expr alive = in(x, y) != 0;
    Func output;
    output(x, y) = select(livingNeighbors == 3 || (alive && livingNeighbors == 2), u8(1), u8(0));    

    return output;
}
示例#8
0
    Func build() {

        Expr width = input.width();
        Expr height = input.height();
        Expr nhom = H.channels();

        //Input
        Func input_func("in");
        input_func(x, y, c) = input(x, y, c);
        
        //Input H
        Func H_func("H");
        H_func(i, j, g) = H(i, j, g);

        //Warping
        Func warp_input = A_warpHomography(input_func, width, height, H_func, nhom);
       
        //Allow for arbitrary strides
        input.set_stride(0, Expr());
        H.set_stride(0, Expr());
        warp_input.output_buffer().set_stride(0, Expr()); 

        return warp_input;
    }
示例#9
0
    Func build() {

        //Input
        Func input_func("in");
        input_func(x, y, c, k) = input(x, y, c, k);

        //Warping
        Func fftOut = ifft2_c2r(input_func, WTARGET, HTARGET);

        //Allow for arbitrary strides
        input.set_stride(0, Expr());
        fftOut.output_buffer().set_stride(0, Expr()); 

        return fftOut;
    }
示例#10
0
void blur(std::string suffix, ImageParam input) {
    input.dim(2).set_bounds(0, 4).set_stride(1).dim(0).set_stride(4);

    Var x("x"), y("y"), c("c");

    Func clamped("clamped");
    clamped = BoundaryConditions::repeat_edge(input);

    Func blur_x("blur_x");
    blur_x(x, y, c) = (clamped(x - 1, y, c) +
                       clamped(x, y, c) +
                       clamped(x + 1, y, c)) / 3;

    Func result("avg_filter");
    result(x, y, c) = (blur_x(x, y - 1, c) +
                       blur_x(x, y, c) +
                       blur_x(x, y + 1, c)) / 3;

    result.output_buffer().dim(2).set_bounds(0, 4).set_stride(1).dim(0).set_stride(4);

    Target target = get_target_from_environment();
    result.bound(c, 0, 4)
          .reorder_storage(c, x, y)
          .reorder(c, x, y);
    if (target.has_gpu_feature() || target.has_feature(Target::OpenGLCompute)) {
        Var xi("xi"), yi("yi");
        result.unroll(c)
              .gpu_tile(x, y, xi, yi, 64, 64);
    } else {
        Var yi("yi");
        result
            .unroll(c)
            .split(y, y, yi, 32)
            .parallel(y)
            .vectorize(x, 4);
        blur_x.store_at(result, y)
            .compute_at(result, yi)
            .reorder(c, x, y)
            .unroll(c)
            .vectorize(x, 4);
    }

    std::string fn_name = std::string("avg_filter") + suffix;
    result.compile_to_file(fn_name, {input}, fn_name);
}
示例#11
0
文件: RDom.cpp 项目: JoeyJAL/Halide
RDom::RDom(ImageParam p) {
    Expr min[4], extent[4];
    for (int i = 0; i < 4; i++) {
        if (p.dimensions() > i) {
            min[i] = 0;
            extent[i] = p.extent(i);
        }
    }
    string names[] = {p.name() + ".x$r", p.name() + ".y$r", p.name() + ".z$r", p.name() + ".w$r"};
    dom = build_domain(names[0], min[0], extent[0],
                       names[1], min[1], extent[1],
                       names[2], min[2], extent[2],
                       names[3], min[3], extent[3]);
    RVar *vars[] = {&x, &y, &z, &w};
    for (int i = 0; i < 4; i++) {
        if (p.dimensions() > i) {
            *(vars[i]) = RVar(names[i], min[i], extent[i], dom);
        }
    }
}
示例#12
0
void set(ImageParam &a, const Buffer &b) { a.set(b); }
示例#13
0
void set_alignment_host_ptr(ImageParam &i, int align, std::map<string, int> &m) {
    i.set_host_alignment(align);
    m.insert(std::pair<string, int>(i.name()+".host", align));
}
示例#14
0
    Func build() {
        // Define the Func.
        Func brighter("brighter");
        brighter(x, y, c) = input(x, y, c) + offset;

        // Schedule it.
        brighter.vectorize(x, 16);

        // We will compile this pipeline to handle memory layouts in
        // several different ways, depending on the 'layout' generator
        // param.
        if (layout == Layout::Planar) {
            // This pipeline as written will only work with images in
            // which each scanline is densely-packed single color
            // channel. In terms of the strides described in lesson
            // 10, Halide assumes and asserts that the stride in x is
            // one.

            // This constraint permits planar images, where the red,
            // green, and blue channels are laid out in memory like
            // this:

            // RRRRRRRR
            // RRRRRRRR
            // RRRRRRRR
            // RRRRRRRR
            // GGGGGGGG
            // GGGGGGGG
            // GGGGGGGG
            // GGGGGGGG
            // BBBBBBBB
            // BBBBBBBB
            // BBBBBBBB
            // BBBBBBBB

            // It also works with the less-commonly used line-by-line
            // layout, in which scanlines of red, green, and blue
            // alternate.

            // RRRRRRRR
            // GGGGGGGG
            // BBBBBBBB
            // RRRRRRRR
            // GGGGGGGG
            // BBBBBBBB
            // RRRRRRRR
            // GGGGGGGG
            // BBBBBBBB
            // RRRRRRRR
            // GGGGGGGG
            // BBBBBBBB

        } else if (layout == Layout::Interleaved) {
            // Another common format is 'interleaved', in which the
            // red, green, and blue values for each pixel occur next
            // to each other in memory:

            // RGBRGBRGBRGBRGBRGBRGBRGB
            // RGBRGBRGBRGBRGBRGBRGBRGB
            // RGBRGBRGBRGBRGBRGBRGBRGB
            // RGBRGBRGBRGBRGBRGBRGBRGB

            // In this case the stride in x is three, the stride in y
            // is three times the width of the image, and the stride
            // in c is one. We can tell Halide to assume (and assert)
            // that this is the case for the input and output like so:

            input
                .set_stride(0, 3) // stride in dimension 0 (x) is three
                .set_stride(2, 1); // stride in dimension 2 (c) is one

            brighter.output_buffer()
                .set_stride(0, 3)
                .set_stride(2, 1);

            // For interleaved layout, you may want to use a different
            // schedule. We'll tell Halide to additionally assume and
            // assert that there are three color channels, then
            // exploit this fact to make the loop over 'c' innermost
            // and unrolled.

            input.set_bounds(2, 0, 3); // Dimension 2 (c) starts at 0 and has extent 3.
            brighter.output_buffer().set_bounds(2, 0, 3);

            // Move the loop over color channels innermost and unroll
            // it.
            brighter.reorder(c, x, y).unroll(c);

            // Note that if we were dealing with an image with an
            // alpha channel (RGBA), then the stride in x and the
            // bounds of the channels dimension would both be four
            // instead of three.

        } else if (layout == Layout::Either) {
            // We can also remove all constraints and compile a
            // pipeline that will work with any memory layout. It will
            // probably be slow, because all vector loads become
            // gathers, and all vector stores become scatters.
            input.set_stride(0, Expr()); // Use a default-constructed
                                         // undefined Expr to mean
                                         // there is no constraint.

            brighter.output_buffer().set_stride(0, Expr());

        } else if (layout == Layout::Specialized) {
            // We can accept any memory layout with good performance
            // by telling Halide to inspect the memory layout at
            // runtime, and branch to different code depending on the
            // strides it find. First we relax the default constraint
            // that stride(0) == 1:

            input.set_stride(0, Expr()); // Use an undefined Expr to
                                         // mean there is no
                                         // constraint.

            brighter.output_buffer().set_stride(0, Expr());

            // The we construct boolean Exprs that detect at runtime
            // whether we're planar or interleaved. The conditions
            // should check for all the facts we want to exploit in
            // each case.
            Expr input_is_planar =
                (input.stride(0) == 1);
            Expr input_is_interleaved =
                (input.stride(0) == 3 &&
                 input.stride(2) == 1 &&
                 input.extent(2) == 3);

            Expr output_is_planar =
                (brighter.output_buffer().stride(0) == 1);
            Expr output_is_interleaved =
                (brighter.output_buffer().stride(0) == 3 &&
                 brighter.output_buffer().stride(2) == 1 &&
                 brighter.output_buffer().extent(2) == 3);

            // We can then use Func::specialize to write a schedule
            // that switches at runtime to specialized code based on a
            // boolean Expr. That code will exploit the fact that the
            // Expr is known to be true.
            brighter.specialize(input_is_planar && output_is_planar);

            // We've already vectorized and parallelized brighter, and
            // our two specializations will inherit those scheduling
            // directives. We can also add additional scheduling
            // directives that apply to a single specialization
            // only. We'll tell Halide to make a specialized version
            // of the code for interleaved layouts, and to reorder and
            // unroll that specialized code.
            brighter.specialize(input_is_interleaved && output_is_interleaved)
                .reorder(c, x, y).unroll(c);

            // We could also add specializations for if the input is
            // interleaved and the output is planar, and vice versa,
            // but two specializations is enough to demonstrate the
            // feature. A later tutorial will explore more creative
            // uses of Func::specialize.

            // Adding specializations can improve performance
            // substantially for the cases they apply to, but it also
            // increases the amount of code to compile and ship. If
            // binary sizes are a concern and the input and output
            // memory layouts are known, you probably want to use
            // set_stride and set_extent instead.
        }

        return brighter;
    }