コード例 #1
0
ファイル: camera_pipe.cpp プロジェクト: iitaku/Halide
Func process(Func raw, Type result_type,
             UniformImage matrix_3200, UniformImage matrix_7000, Uniform<float> color_temp, 
             Uniform<float> gamma, Uniform<float> contrast) {

    Func processed("processed");
    Var xi, yi;

    Func denoised = hot_pixel_suppression(raw);
    Func deinterleaved = deinterleave(denoised);
    Func demosaiced = demosaic(deinterleaved);
    Func corrected = color_correct(demosaiced, matrix_3200, matrix_7000, color_temp);
    Func curved = apply_curve(corrected, result_type, gamma, contrast);

    // Schedule
    Var co, ci;
    //#define USE_CI_HACK
    #ifndef USE_CI_HACK
    ci = c;
    #endif
    processed(tx, ty, c) = curved(tx, ty, ci);
    #ifdef USE_CI_HACK
    processed.split(c, co, ci, 3); // bound color loop to 0-3
    #else
    processed.bound(c, 0, 3); // bound color loop 0-3, properly
    #endif
    if (schedule == 0) {
        // Compute in chunks over tiles, vectorized by 8
        denoised.chunk(tx).vectorize(x, 8);
        deinterleaved.chunk(tx).vectorize(x, 8);
        corrected.chunk(tx).vectorize(x, 4);
        processed.tile(tx, ty, xi, yi, 32, 32).reorder(xi, yi, ci, tx, ty);
        processed.parallel(ty);
    } else if (schedule == 1) {
        // Same as above, but don't vectorize (sse is bad at interleaved 16-bit ops)
        denoised.chunk(tx);
        deinterleaved.chunk(tx);
        corrected.chunk(tx);
        processed.tile(tx, ty, xi, yi, 128, 128).reorder(xi, yi, ci, tx, ty);
        processed.parallel(ty);
    } else {
        denoised.root();
        deinterleaved.root();
        corrected.root();
        processed.root();
    }

    return processed;
}
コード例 #2
0
ファイル: camera_pipe.cpp プロジェクト: iitaku/Halide
Func demosaic(Func deinterleaved) {
    // These are the values we already know from the input
    // x_y = the value of channel x at a site in the input of channel y
    // gb refers to green sites in the blue rows
    // gr refers to green sites in the red rows

    // Give more convenient names to the four channels we know
    Func r_r, g_gr, g_gb, b_b;
    g_gr(x, y) = deinterleaved(x, y, 0);
    r_r(x, y)  = deinterleaved(x, y, 1);
    b_b(x, y)  = deinterleaved(x, y, 2);
    g_gb(x, y) = deinterleaved(x, y, 3);

    // These are the ones we need to interpolate
    Func b_r, g_r, b_gr, r_gr, b_gb, r_gb, r_b, g_b;

    // First calculate green at the red and blue sites

    // Try interpolating vertically and horizontally. Also compute
    // differences vertically and horizontally. Use interpolation in
    // whichever direction had the smallest difference.
    Expr gv_r  =    (g_gb(x, y-1) + g_gb(x, y))/2;
    Expr gvd_r = abs(g_gb(x, y-1) - g_gb(x, y));
    Expr gh_r  =    (g_gr(x+1, y) + g_gr(x, y))/2;
    Expr ghd_r = abs(g_gr(x+1, y) - g_gr(x, y));

    g_r(x, y)  = select(ghd_r < gvd_r, gh_r, gv_r);

    Expr gv_b  =    (g_gr(x, y+1) + g_gr(x, y))/2;
    Expr gvd_b = abs(g_gr(x, y+1) - g_gr(x, y));
    Expr gh_b  =    (g_gb(x-1, y) + g_gb(x, y))/2;
    Expr ghd_b = abs(g_gb(x-1, y) - g_gb(x, y));

    g_b(x, y)  = select(ghd_b < gvd_b, gh_b, gv_b);

    // Next interpolate red at gr by first interpolating, then
    // correcting using the error green would have had if we had
    // interpolated it in the same way (i.e. add the second derivative
    // of the green channel at the same place).
    Expr correction;
    correction = g_gr(x, y) - (g_r(x, y) + g_r(x-1, y))/2;
    r_gr(x, y) = correction + (r_r(x-1, y) + r_r(x, y))/2;

    // Do the same for other reds and blues at green sites
    correction = g_gr(x, y) - (g_b(x, y) + g_b(x, y-1))/2;
    b_gr(x, y) = correction + (b_b(x, y) + b_b(x, y-1))/2;

    correction = g_gb(x, y) - (g_r(x, y) + g_r(x, y+1))/2;
    r_gb(x, y) = correction + (r_r(x, y) + r_r(x, y+1))/2;

    correction = g_gb(x, y) - (g_b(x, y) + g_b(x+1, y))/2;
    b_gb(x, y) = correction + (b_b(x, y) + b_b(x+1, y))/2;

    // Now interpolate diagonally to get red at blue and blue at
    // red. Hold onto your hats; this gets really fancy. We do the
    // same thing as for interpolating green where we try both
    // directions (in this case the positive and negative diagonals),
    // and use the one with the lowest absolute difference. But we
    // also use the same trick as interpolating red and blue at green
    // sites - we correct our interpolations using the second
    // derivative of green at the same sites.

    correction = g_b(x, y)  - (g_r(x, y) + g_r(x-1, y+1))/2;
    Expr rp_b  = correction + (r_r(x, y) + r_r(x-1, y+1))/2;
    Expr rpd_b = abs(r_r(x, y) - r_r(x-1, y+1));

    correction = g_b(x, y)  - (g_r(x-1, y) + g_r(x, y+1))/2;
    Expr rn_b  = correction + (r_r(x-1, y) + r_r(x, y+1))/2;
    Expr rnd_b = abs(r_r(x-1, y) - r_r(x, y+1));

    r_b(x, y)  = select(rpd_b < rnd_b, rp_b, rn_b);


    // Same thing for blue at red
    correction = g_r(x, y)  - (g_b(x, y) + g_b(x+1, y-1))/2;
    Expr bp_r  = correction + (b_b(x, y) + b_b(x+1, y-1))/2;
    Expr bpd_r = abs(b_b(x, y) - b_b(x+1, y-1));

    correction = g_r(x, y)  - (g_b(x+1, y) + g_b(x, y-1))/2;
    Expr bn_r  = correction + (b_b(x+1, y) + b_b(x, y-1))/2;
    Expr bnd_r = abs(b_b(x+1, y) - b_b(x, y-1));

    b_r(x, y)  =  select(bpd_r < bnd_r, bp_r, bn_r);    

    // Interleave the resulting channels
    Func r = interleave_y(interleave_x(r_gr, r_r),
                          interleave_x(r_b, r_gb));
    Func g = interleave_y(interleave_x(g_gr, g_r),
                          interleave_x(g_b, g_gb));
    Func b = interleave_y(interleave_x(b_gr, b_r),
                          interleave_x(b_b, b_gb));


    Func output;
    output(x, y, c) = select(c == 0, r(x, y), 
                             select(c == 1, g(x, y), b(x, y)));


    /* THE SCHEDULE */    
    if (schedule == 0) {
        // optimized for ARM
        // Compute these in chunks over tiles, vectorized by 8
        g_r.chunk(tx).vectorize(x, 8);
        g_b.chunk(tx).vectorize(x, 8);
        r_gr.chunk(tx).vectorize(x, 8);
        b_gr.chunk(tx).vectorize(x, 8);
        r_gb.chunk(tx).vectorize(x, 8);
        b_gb.chunk(tx).vectorize(x, 8);
        r_b.chunk(tx).vectorize(x, 8);
        b_r.chunk(tx).vectorize(x, 8);
        // These interleave in y, so unrolling them in y helps
        r.chunk(tx).vectorize(x, 8).unroll(y, 2);
        g.chunk(tx).vectorize(x, 8).unroll(y, 2);
        b.chunk(tx).vectorize(x, 8).unroll(y, 2);
    } else if (schedule == 1) {
        // optimized for X86
        // Don't vectorize, because sse is bad at 16-bit interleaving
        g_r.chunk(tx);
        g_b.chunk(tx);
        r_gr.chunk(tx);
        b_gr.chunk(tx);
        r_gb.chunk(tx);
        b_gb.chunk(tx);
        r_b.chunk(tx);
        b_r.chunk(tx);
        // These interleave in x and y, so unrolling them helps
        r.chunk(tx).unroll(x, 2).unroll(y, 2);
        g.chunk(tx).unroll(x, 2).unroll(y, 2);
        b.chunk(tx).unroll(x, 2).unroll(y, 2);
    } else {
        // Basic naive schedule
        g_r.root();
        g_b.root();
        r_gr.root();
        b_gr.root();
        r_gb.root();
        b_gb.root();
        r_b.root();
        b_r.root();
        r.root();
        g.root();
        b.root();
    }
    return output;
}