int main(int argc, char **argv) { Func mandelbrot; Var x, y; Param<float> x_min, x_max, y_min, y_max, c_real, c_imag; Param<int> w, h, iters; Complex initial(lerp(x_min, x_max, cast<float>(x)/w), lerp(y_min, y_max, cast<float>(y)/h)); Complex c(c_real, c_imag); Var z; mandelbrot(x, y, z) = initial; RDom t(1, iters); Complex current = mandelbrot(x, y, t-1); mandelbrot(x, y, t) = current*current + c; // How many iterations until something escapes a circle of radius 2? Func count; Tuple escape = argmin(magnitude(mandelbrot(x, y, t)) < 4); // If it never escapes, use the value 0 count(x, y) = select(escape[1], 0, escape[0]); Var xi, yi, xo, yo; count.tile(x, y, xo, yo, xi, yi, 8, 8); count.parallel(yo).vectorize(xi, 4).unroll(xi).unroll(yi, 2); mandelbrot.compute_at(count, xo); Argument args[] = {x_min, x_max, y_min, y_max, c_real, c_imag, iters, w, h}; count.compile_to_file("mandelbrot", std::vector<Argument>(args, args + 9)); return 0; }
int main(int argc, char **argv) { // We'll define the simple one-stage pipeline that we used in lesson 10. Func brighter; Var x, y; // Declare the arguments. Param<uint8_t> offset; ImageParam input(type_of<uint8_t>(), 2); std::vector<Argument> args(2); args[0] = input; args[1] = offset; // Define the Func. brighter(x, y) = input(x, y) + offset; // Schedule it. brighter.vectorize(x, 16).parallel(y); // The following line is what we did in lesson 10. It compiles an // object file suitable for the system that you're running this // program on. For example, if you compile and run this file on // 64-bit linux on an x86 cpu with sse4.1, then the generated code // will be suitable for 64-bit linux on x86 with sse4.1. brighter.compile_to_file("h_brightness_compiled", args); printf("Success!\n"); return 0; }
int main(int argc, char **argv) { // The camera pipe is specialized on the 2592x1968 images that // come in, so we'll just use an image instead of a uniform image. ImageParam input(UInt(16), 2); ImageParam matrix_3200(Float(32), 2, "m3200"), matrix_7000(Float(32), 2, "m7000"); Param<float> color_temp("color_temp"); //, 3200.0f); Param<float> gamma("gamma"); //, 1.8f); Param<float> contrast("contrast"); //, 10.0f); Param<int> blackLevel("blackLevel"); //, 25); Param<int> whiteLevel("whiteLevel"); //, 1023); // shift things inwards to give us enough padding on the // boundaries so that we don't need to check bounds. We're going // to make a 2560x1920 output image, just like the FCam pipe, so // shift by 16, 12 Func shifted; shifted(x, y) = input(x+16, y+12); // Parameterized output type, because LLVM PTX (GPU) backend does not // currently allow 8-bit computations int bit_width = atoi(argv[1]); Type result_type = UInt(bit_width); // Pick a schedule schedule = atoi(argv[2]); // Build the pipeline Func processed = process(shifted, result_type, matrix_3200, matrix_7000, color_temp, gamma, contrast, blackLevel, whiteLevel); // We can generate slightly better code if we know the output is a whole number of tiles. Expr out_width = processed.output_buffer().width(); Expr out_height = processed.output_buffer().height(); processed .bound(tx, 0, (out_width/32)*32) .bound(ty, 0, (out_height/32)*32); //string s = processed.serialize(); //printf("%s\n", s.c_str()); std::vector<Argument> args = {color_temp, gamma, contrast, blackLevel, whiteLevel, input, matrix_3200, matrix_7000}; processed.compile_to_file("curved", args); processed.compile_to_assembly("curved.s", args); return 0; }
int main(int argc, char **argv) { ImageParam input(Float(32), 2); Var x, y; Func g; g(x, y) = input(x, y) * 2; g.compute_root(); Func f; f(x, y) = g(x, y); f.parallel(y); f.trace_stores(); f.compile_to_file("user_context_insanity", input, user_context_param()); return 0; }
int main(int argc, char **argv){ ImageParam input(type_of<float>(),3); Func in = lambda(x,y,c,input(x,y,c)); Func outputImage; outputImage(x,y,c) = undef<uint8_t>(); outputImage(x,y,0) = cast<uint8_t>(min((1000.0f*256.0f*pow(in(x,y,CRYSTAL_RAD_R),2)* in(x,y,ACTIVE_CRYSTALS_R)),255.0f)); outputImage(x,y,1) = cast<uint8_t>(min((1000.0f*256.0f*pow(in(x,y,CRYSTAL_RAD_G),2)* in(x,y,ACTIVE_CRYSTALS_G)),255.0f)); outputImage(x,y,2) = cast<uint8_t>(min((1000.0f*256.0f*pow(in(x,y,CRYSTAL_RAD_B),2)* in(x,y,ACTIVE_CRYSTALS_B)),255.0f)); std::vector<Argument> args(1); args[0] = input; outputImage.compile_to_file("generateFilmulatedImage",args); return 0; }
int main(int argc, char **argv) { Param<float> reservoirConcentration; Param<float> stepTime; Param<float> layerMixConst; Param<float> layerTimeDivisor; Func sumDx; Func layerMixed; Func initialDeveloperMirrored; ImageParam devConc(type_of<float>(),2); Func dDevelConc; Func developerConcentration = lambda(x,y,devConc(x,y)); dDevelConc = calcLayerMix(developerConcentration, layerMixConst, stepTime, layerTimeDivisor, reservoirConcentration); std::vector<Argument> ddcArgs = dDevelConc.infer_arguments(); dDevelConc.compile_to_file("calcLayerMix",ddcArgs); return 0; }
int main(int argc, char **argv) { Param<float> time; const float pi = 3.1415926536; Var x, y, c; Func result; Expr kx, ky; Expr xx, yy; kx = x / 150.0f; ky = y / 150.0f; xx = kx + sin(time/3.0f); yy = ky + sin(time/2.0f); Expr angle; angle = 2 * pi * sin(time/20.0f); kx = kx * cos(angle) - ky * sin(angle); ky = kx * sin(angle) + ky * cos(angle); Expr v = 0.0f; v += sin((ky + time) / 2.0f); v += sin((kx + ky + time) / 2.0f); v += sin(sqrt(xx * xx + yy * yy + 1.0f) + time); result(x, y, c) = cast<uint8_t>( select(c == 0, 32, select(c == 1, cos(pi * v), sin(pi * v)) * 80 + (255 - 80))); result.output_buffer().set_stride(0, 4); result.bound(c, 0, 4); result.glsl(x, y, c); result.compile_to_file("halide_gl_filter", {time}, "halide_gl_filter"); return 0; }
int main(int argc, char **argv) { // Make sure it's possible to generate object files for lots of // targets. This provides early warning that you may have broken // Halide on some other platform. Func f; Var x; f(x) = x; std::string targets[] = { "x86-64-linux", "x86-32-linux", "x86-64-osx", "x86-32-osx", "x86-64-windows", "x86-32-windows", "arm-64-ios", "arm-32-ios", "arm-64-android", "arm-32-android", "mips-32-android" }; for (const std::string &t : targets) { Target target = parse_target_string(t); f.compile_to_file("test_object_" + t, std::vector<Argument>(), target); #ifndef _MSC_VER std::string object_name = "test_object_" + t + ".o"; if ((target.os == Target::Windows) && (!target.has_feature(Target::MinGW))) object_name += "bj"; assert(access(object_name.c_str(), F_OK) == 0 && "Output file not created."); #endif } printf("Success!\n"); return 0; }
int main(int argc, char **argv) { // First define the function that gives the initial state. { Param<float> cx, cy; Func initial; // The initial state is a quantity of three chemicals present // at each pixel near the boundaries Expr dx = (x - cx), dy = (y - cy); Expr r = dx * dx + dy * dy; Expr mask = r < 200 * 200; initial(x, y, c) = random_float();// * select(mask, 1.0f, 0.001f); initial.compile_to_file("reaction_diffusion_2_init", {cx, cy}, "reaction_diffusion_2_init"); } // Then the function that updates the state. Also depends on user input. { ImageParam state(Float(32), 3); Param<int> mouse_x, mouse_y; Param<float> cx, cy; Param<int> frame; Func clamped = BoundaryConditions::repeat_edge(state); Func blur_x, blur_y, blur; blur_x(x, y, c) = (clamped(x-3, y, c) + clamped(x-1, y, c) + clamped(x, y, c) + clamped(x+1, y, c) + clamped(x+3, y, c)); blur_y(x, y, c) = (clamped(x, y-3, c) + clamped(x, y-1, c) + clamped(x, y, c) + clamped(x, y+1, c) + clamped(x, y+3, c)); blur(x, y, c) = (blur_x(x, y, c) + blur_y(x, y, c))/10; Expr R = blur(x, y, 0); Expr G = blur(x, y, 1); Expr B = blur(x, y, 2); // Push the colors outwards with a sigmoid Expr s = 0.5f; R *= (1 - s) + s * R * (3 - 2 * R); G *= (1 - s) + s * G * (3 - 2 * G); B *= (1 - s) + s * B * (3 - 2 * B); // Reaction Expr dR = B * (1 - R - G); Expr dG = (1 - B) * (R - G); Expr dB = 1 - B + 2 * G * R - R - G; Expr bump = (frame % 1024) / 1024.0f; bump *= 1 - bump; Expr alpha = lerp(0.3f, 0.7f, bump); dR = select(dR > 0, dR*alpha, dR); Expr t = 0.1f; R += t * dR; G += t * dG; B += t * dB; R = clamp(R, 0.0f, 1.0f); G = clamp(G, 0.0f, 1.0f); B = clamp(B, 0.0f, 1.0f); Func new_state; new_state(x, y, c) = select(c == 0, R, select(c == 1, G, B)); // Noise at the edges new_state(x, state.top(), c) = random_float(frame)*0.2f; new_state(x, state.bottom(), c) = random_float(frame)*0.2f; new_state(state.left(), y, c) = random_float(frame)*0.2f; new_state(state.right(), y, c) = random_float(frame)*0.2f; // Add some white where the mouse is Expr min_x = clamp(mouse_x - 20, 0, state.width()-1); Expr max_x = clamp(mouse_x + 20, 0, state.width()-1); Expr min_y = clamp(mouse_y - 20, 0, state.height()-1); Expr max_y = clamp(mouse_y + 20, 0, state.height()-1); RDom clobber(min_x, max_x - min_x + 1, min_y, max_y - min_y + 1); Expr dx = clobber.x - mouse_x; Expr dy = clobber.y - mouse_y; Expr radius = dx * dx + dy * dy; new_state(clobber.x, clobber.y, c) = select(radius < 400.0f, 1.0f, new_state(clobber.x, clobber.y, c)); new_state.reorder(c, x, y).bound(c, 0, 3).unroll(c); Var yi; new_state.split(y, y, yi, 64).parallel(y); //blur_x.store_at(new_state, y).compute_at(new_state, yi); blur.compute_at(new_state, yi); clamped.store_at(new_state, y).compute_at(new_state, yi); new_state.vectorize(x, 4); blur.vectorize(x, 4); state.set_bounds(2, 0, 3); std::vector<Argument> args(6); args[0] = state; args[1] = mouse_x; args[2] = mouse_y; args[3] = cx; args[4] = cy; args[5] = frame; new_state.compile_to_file("reaction_diffusion_2_update", args, "reaction_diffusion_2_update"); } // Now the function that converts the state into an argb image. { ImageParam state(Float(32), 3); Func contour; contour(x, y, c) = pow(state(x, y, c) * (1 - state(x, y, c)) * 4, 8); Expr c0 = contour(x, y, 0), c1 = contour(x, y, 1), c2 = contour(x, y, 2); Expr R = min(c0, max(c1, c2)); Expr G = (c0 + c1 + c2)/3; Expr B = max(c0, max(c1, c2)); Expr alpha = 255 << 24; Expr red = cast<int32_t>(R * 255) * (1 << 0); Expr green = cast<int32_t>(G * 255) * (1 << 8); Expr blue = cast<int32_t>(B * 255) * (1 << 16); Func render; render(x, y) = alpha + red + green + blue; render.vectorize(x, 4); Var yi; render.split(y, y, yi, 64).parallel(y); render.compile_to_file("reaction_diffusion_2_render", {state}, "reaction_diffusion_2_render"); } return 0; }
int main(int argc, char **argv) { bool can_vectorize = (get_target_from_environment().arch != Target::PNaCl); Expr random_bit = cast<uint8_t>(random_float() > 0.5f); // First define the function that gives the initial state of the // game board { Func initial; initial(x, y, c) = random_bit; initial.compile_to_file("game_of_life_init"); } // Then the function that updates the state. Also depends on user input. { ImageParam state(UInt(8), 3); Param<int> mouse_x, mouse_y; Expr xm = max(x-1, 0), xp = min(x+1, state.width()-1); Expr ym = max(y-1, 0), yp = min(y+1, state.height()-1); // Count the number of live neighbors. Expr count = (state(xm, ym, c) + state(x, ym, c) + state(xp, ym, c) + state(xm, y, c) + state(xp, y, c) + state(xm, yp, c) + state(x, yp, c) + state(xp, yp, c)); // Was this pixel alive in the previous generation? Expr alive_before = state(x, y, c) != 0; // We're alive in the next generation if we have two neighbors and // were alive before, or if we have three neighbors. Expr alive_now = (count == 2 && alive_before) || count == 3; Expr alive = cast<uint8_t>(1); Expr dead = cast<uint8_t>(0); Func output; output(x, y, c) = select(alive_now, alive, dead); // Clobber part of the output around where the mouse is with random junk Expr min_x = clamp(mouse_x - 10, 0, state.width()-1); Expr max_x = clamp(mouse_x + 10, 0, state.width()-1); Expr min_y = clamp(mouse_y - 10, 0, state.height()-1); Expr max_y = clamp(mouse_y + 10, 0, state.height()-1); RDom clobber(min_x, max_x - min_x + 1, min_y, max_y - min_y + 1); Expr dx = clobber.x - mouse_x; Expr dy = clobber.y - mouse_y; Expr r = dx*dx + dy*dy; output(clobber.x, clobber.y, c) = select(r < 100, cast<uint8_t>(random_float() < 0.25f), output(clobber.x, clobber.y, c)); if (can_vectorize) { output.vectorize(x, 4); } Var yi; output.split(y, y, yi, 16).reorder(x, yi, c, y).parallel(y); output.compile_to_file("game_of_life_update", state, mouse_x, mouse_y); } // Now the function that converts the state into an argb image. { ImageParam state(UInt(8), 3); Func render; Expr r = select(state(x, y, 0) == 1, 255, 0); Expr g = select(state(x, y, 1) == 1, 255, 0); Expr b = select(state(x, y, 2) == 1, 255, 0); render(x, y) = (255 << 24) + (r << 16) + (g << 8) + b; if (can_vectorize) { render.vectorize(x, 4); } Var yi; render.split(y, y, yi, 16).parallel(y); render.compile_to_file("game_of_life_render", state); } return 0; }
int main(int argc, char **argv) { // First define the function that gives the initial state. { Func initial; // The state is just a counter initial() = 0; initial.compile_to_file("julia_init"); } // Then the function that updates the state. Also depends on user input. { ImageParam state(Int(32), 0); Param<int> mouse_x, mouse_y; Func new_state; // Increment the counter new_state() = state() + 1; new_state.compile_to_file("julia_update", state, mouse_x, mouse_y); } // Now the function that converts the state into an argb image. { ImageParam state(Int(32), 0); Expr c_real = cos(state() / 30.0f); Expr c_imag = sin(state() / 30.0f); Expr r_adjust = (cos(state() / 43.0f) + 1.5f) * 0.5f; c_real *= r_adjust; c_imag *= r_adjust; Func julia; julia(x, y, c) = Tuple((x - 511.5f)/256.0f, (y - 511.5f)/256.0f); const int iters = 20; RDom t(1, iters); Expr old_real = julia(x, y, t-1)[0]; Expr old_imag = julia(x, y, t-1)[1]; Expr new_real = old_real * old_real - old_imag * old_imag + c_real; Expr new_imag = 2 * old_real * old_imag + c_imag; julia(x, y, t) = Tuple(new_real, new_imag); // How many iterations until something escapes a circle of radius 2? new_real = julia(x, y, t)[0]; new_imag = julia(x, y, t)[1]; Expr mag = new_real * new_real + new_imag * new_imag; Expr escape = argmin(select(mag < 4, 1, 0))[0]; // Now pick a color based on the number of escape iterations. Expr r_scale = 128; Expr g_scale = 200; Expr b_scale = 256; Func color_map; Expr escape_f = sqrt(cast<float>(x) / (iters + 1)); Expr r = cast<int32_t>(escape_f * r_scale); Expr g = cast<int32_t>(escape_f * g_scale); Expr b = cast<int32_t>(escape_f * b_scale); color_map(x) = (255 << 24) | (r << 16) | (g << 8) | b; Func render; render(x, y) = color_map(escape); Var yi; // The julia set has rotational symmetry, so we just render // the top half and then flip it for the bottom half. Func final; Expr y_up = min(y, 511); Expr y_down = max(y, 512); final(x, y) = select(y < 512, render(x, y_up), render(1023 - x, 1023 - y_down)); Var yo; final.bound(x, 0, 1024).bound(y, 0, 1024);
int main(int argc, char **argv) { // First define the function that gives the initial state. { Func initial; // The state is just a counter initial() = 0; initial.compile_to_file("julia_init"); } // Then the function that updates the state. Also depends on user input. { ImageParam state(Int(32), 0); Param<int> mouse_x, mouse_y; Func new_state; // Increment the counter new_state() = state() + 1; new_state.compile_to_file("julia_update", state, mouse_x, mouse_y); } // Now the function that converts the state into an argb image. { ImageParam state(Int(32), 0); Expr c_real = cos(state() / 60.0f); Expr c_imag = sin(state() / 43.0f); Expr r_adjust = (cos(state() / 86.0f) + 2.0f) * 0.25f; c_real *= r_adjust; c_imag *= r_adjust; Func julia; julia(x, y, c) = Tuple((x - 511.5f)/350.0f, (y - 511.5f)/350.0f); const int iters = 20; RDom t(1, iters); Expr old_real = julia(x, y, t-1)[0]; Expr old_imag = julia(x, y, t-1)[1]; Expr new_real = old_real * old_real - old_imag * old_imag + c_real; Expr new_imag = 2 * old_real * old_imag + c_imag; Expr mag = new_real * new_real + new_imag * new_imag; new_real = select(mag > 1e20f, old_real, new_real); new_imag = select(mag > 1e20f, old_imag, new_imag); julia(x, y, t) = Tuple(new_real, new_imag); // Define some arbitrary measure on the complex plane, and // compute the minimum of that measure over the orbit of each // point. new_real = julia(x, y, t)[0]; new_imag = julia(x, y, t)[1]; mag = new_real * c_real - new_imag * new_imag * c_imag; Expr measure = minimum(abs(mag - 0.1f)); // Now pick a color based on that Expr r_f = 16 * sqrt(2.0f/(measure + 0.01f)); Expr b_f = 512 * measure * fast_exp(-measure*measure); Expr g_f = (r_f + b_f)/2; Expr min_c = min(r_f, min(b_f, g_f)); r_f -= min_c; b_f -= min_c; g_f -= min_c; Expr r = cast<int32_t>(min(r_f, 255)); Expr g = cast<int32_t>(min(g_f, 255)); Expr b = cast<int32_t>(min(b_f, 255)); Expr color = (255 << 24) | (r << 16) | (g << 8) | b; Func render; render(x, y) = color; Var yi; // The julia set has rotational symmetry, so we just render // the top half and then flip it for the bottom half. Func final; Expr y_up = min(y, 511); Expr y_down = max(y, 512); final(x, y) = select(y < 512, render(x, y_up), render(1023 - x, 1023 - y_down)); Var yo; final.bound(x, 0, 1024).bound(y, 0, 1024);
int main(int argc, char **argv) { // We'll define the simple one-stage pipeline that we used in lesson 10. Func brighter; Var x, y; // Declare the arguments. Param<uint8_t> offset; ImageParam input(type_of<uint8_t>(), 2); std::vector<Argument> args(2); args[0] = input; args[1] = offset; // Define the Func. brighter(x, y) = input(x, y) + offset; // Schedule it. brighter.vectorize(x, 16).parallel(y); // The following line is what we did in lesson 10. It compiles an // object file suitable for the system that you're running this // program on. For example, if you compile and run this file on // 64-bit linux on an x86 cpu with sse4.1, then the generated code // will be suitable for 64-bit linux on x86 with sse4.1. brighter.compile_to_file("lesson_11_host", args); // We can also compile object files suitable for other cpus and // operating systems. You do this with an optional third argument // to compile_to_file which specifies the target to compile for. // Let's use this to compile a 32-bit arm android version of this code: Target target; target.os = Target::Android; // The operating system target.arch = Target::ARM; // The CPU architecture target.bits = 32; // The bit-width of the architecture std::vector<Target::Feature> arm_features; // A list of features to set target.set_features(arm_features); brighter.compile_to_file("lesson_11_arm_32_android", args, target); // Pass the target as the last argument. // And now a Windows object file for 64-bit x86 with AVX and SSE 4.1: target.os = Target::Windows; target.arch = Target::X86; target.bits = 64; std::vector<Target::Feature> x86_features; x86_features.push_back(Target::AVX); x86_features.push_back(Target::SSE41); target.set_features(x86_features); brighter.compile_to_file("lesson_11_x86_64_windows", args, target); // And finally an iOS mach-o object file for one of Apple's 32-bit // ARM processors - the A6. It's used in the iPhone 5. The A6 uses // a slightly modified ARM architecture called ARMv7s. We specify // this using the target features field. Support for Apple's // 64-bit ARM processors is very new in llvm, and still somewhat // flaky. target.os = Target::IOS; target.arch = Target::ARM; target.bits = 32; std::vector<Target::Feature> armv7s_features; armv7s_features.push_back(Target::ARMv7s); target.set_features(armv7s_features); brighter.compile_to_file("lesson_11_arm_32_ios", args, target); // Now let's check these files are what they claim, by examining // their first few bytes. // 32-arm android object files start with the magic bytes: uint8_t arm_32_android_magic[] = {0x7f, 'E', 'L', 'F', // ELF format 1, // 32-bit 1, // 2's complement little-endian 1, // Current version of elf 3, // Linux 0, 0, 0, 0, 0, 0, 0, 0, // 8 unused bytes 1, 0, // Relocatable 0x28, 0}; // ARM FILE *f = fopen("lesson_11_arm_32_android.o", "rb"); uint8_t header[32]; if (!f || fread(header, 32, 1, f) != 1) { printf("Object file not generated\n"); return -1; } fclose(f); if (memcmp(header, arm_32_android_magic, sizeof(arm_32_android_magic))) { printf("Unexpected header bytes in 32-bit arm object file.\n"); return -1; } // 64-bit windows object files start with the magic 16-bit value 0x8664 // (presumably referring to x86-64) uint8_t win_64_magic[] = {0x64, 0x86}; f = fopen("lesson_11_x86_64_windows.o", "rb"); if (!f || fread(header, 32, 1, f) != 1) { printf("Object file not generated\n"); return -1; } fclose(f); if (memcmp(header, win_64_magic, sizeof(win_64_magic))) { printf("Unexpected header bytes in 64-bit windows object file.\n"); return -1; } // 32-bit arm iOS mach-o files start with the following magic bytes: uint32_t arm_32_ios_magic[] = {0xfeedface, // Mach-o magic bytes 12, // CPU type is ARM 11, // CPU subtype is ARMv7s 1}; // It's a relocatable object file. f = fopen("lesson_11_arm_32_ios.o", "rb"); if (!f || fread(header, 32, 1, f) != 1) { printf("Object file not generated\n"); return -1; } fclose(f); if (memcmp(header, arm_32_ios_magic, sizeof(arm_32_ios_magic))) { printf("Unexpected header bytes in 32-bit arm ios object file.\n"); return -1; } // It looks like the object files we produced are plausible for // those targets. We'll count that as a success for the purposes // of this tutorial. For a real application you'd then need to // figure out how to integrate Halide into your cross-compilation // toolchain. There are several small examples of this in the // Halide repository under the apps folder. See HelloAndroid and // HelloiOS here: // https://github.com/halide/Halide/tree/master/apps/ printf("Success!\n"); return 0; }
int main(int argc, char **argv) { // First define the function that gives the initial state. { Func initial; // The initial state is a quantity of two chemicals present at each pixel initial(x, y, c) = random_float(); initial.compile_to_file("reaction_diffusion_init", {}); } // Then the function that updates the state. Also depends on user input. { ImageParam state(Float(32), 3); Param<int> mouse_x, mouse_y; Expr a = state(x, y, 0), b = state(x, y, 1); Func clamped = BoundaryConditions::repeat_edge(state); RDom kernel(-2, 5); Func g, gaussian; g(x) = exp(-x*x*0.3f); gaussian(x) = g(x) / sum(g(kernel)); gaussian.compute_root(); Func blur_x, blur_y; blur_x(x, y, c) = sum(gaussian(kernel) * clamped(x + kernel, y, c)); blur_y(x, y, c) = sum(gaussian(kernel) * blur_x(x, y + kernel, c)); // Diffusion Expr new_a = blur_y(x, y, 0); Expr new_b = blur_y(x, y, 1); // Reaction Expr f = 0.08f; Expr k = 0.16f; new_a += 0.4f * (a - a*a*a - b + k); new_b += 0.4f * f * (a - b); new_a = clamp(new_a, 0.0f, 1.0f); new_b = clamp(new_b, 0.0f, 1.0f); Func new_state; new_state(x, y, c) = select(c == 0, new_a, new_b); // Finally add some noise at the edges to keep things moving Expr r = lerp(-0.05f, 0.05f, random_float()); new_state(x, 0, c) += r; new_state(x, 1023, c) += r; new_state(0, y, c) += r; new_state(1023, y, c) += r; new_state .vectorize(x, 4) .bound(c, 0, 2).unroll(c); Var yi; new_state.split(y, y, yi, 16).parallel(y); blur_x.store_at(new_state, y).compute_at(new_state, yi); blur_x.vectorize(x, 4); clamped.store_at(new_state, y).compute_at(new_state, yi); new_state.compile_to_file("reaction_diffusion_update", {state, mouse_x, mouse_y}); } // Now the function that converts the state into an argb image. { ImageParam state(Float(32), 3); Expr a = state(x, y, 0), b = state(x, y, 1); Expr alpha = 255 << 24; Expr red = cast<int32_t>(a * 255) * (1 << 16); Expr green = 0; Expr blue = cast<int32_t>(b * 255); Func render; render(x, y) = alpha + red + green + blue; render.vectorize(x, 4); Var yi; render.split(y, y, yi, 16).parallel(y); render.compile_to_file("reaction_diffusion_render", {state}); } return 0; }