void profile_for_type(const T& elem, const std::string& type, const int add_flops, const int multiply_flops) { std::cout << "Profiling for array type " << type << "." << std::endl; unsigned int n = 100; pyQCD::LexicoLayout layout(std::vector<unsigned int>{n}); pyQCD::Lattice<T> lattice1(layout, elem); decltype(lattice1) lattice2(layout, elem); decltype(lattice1) lattice3(layout, elem); decltype(lattice1) result(layout, elem); std::cout << "Profiling f(x, y, z) = x + y + z:" << std::endl; benchmark([&] () { result = lattice1 + lattice2 + lattice3; }, 2 * add_flops * n, 1000000); std::cout << "Profiling f(x, y) = 5.0 * x + y:" << std::endl; benchmark([&] () { result = 5.0 * lattice1 + lattice2; }, 2 * add_flops * n, 1000000); std::cout << "Profiling f(x, y, z) = x * y + z:" << std::endl; benchmark([&] () { result = lattice1 * lattice2 + lattice3; }, (add_flops + multiply_flops) * n, 1000000); std::cout << std::endl; }
float noise_gen(const struct noise *noise, float *f) { int n[MAX_DIMENSIONS]; float r[MAX_DIMENSIONS]; float w[MAX_DIMENSIONS]; for(int i = 0; i < noise->ndim; i++) { n[i] = floor(f[i]); r[i] = f[i] - n[i]; w[i] = cubic(r[i]); } float value; switch(noise->ndim) { case 1: value = lerp(lattice1(noise, n[0] , r[0] ), lattice1(noise, n[0]+1, r[0]+1), w[0]); break; case 2: value = lerp(lerp(lattice2(noise, n[0], r[0], n[1], r[1]), lattice2(noise, n[0]+1, r[0]-1, n[1], r[1]), w[0]), lerp(lattice2(noise, n[0], r[0], n[1]+1, r[1]-1), lattice2(noise, n[0]+1, r[0]-1, n[1]+1, r[1]-1), w[0]), w[1]); break; case 3: //printf("noise3 start\n"); value = lerp(lerp(lerp(lattice3(noise, n[0] , r[0] , n[1], r[1], n[2], r[2]), lattice3(noise, n[0]+1, r[0]-1, n[1], r[1], n[2], r[2]), w[0]), lerp(lattice3(noise, n[0] , r[0] , n[1]+1, r[1]-1, n[2], r[2]), lattice3(noise, n[0]+1, r[0]-1, n[1]+1, r[1]-1, n[2], r[2]), w[0]), w[1]), lerp(lerp(lattice3(noise, n[0] , r[0] , n[1], r[1], n[2]+1, r[2]-1), lattice3(noise, n[0]+1, r[0]-1, n[1], r[1], n[2]+1, r[2]-1), w[0]), lerp(lattice3(noise, n[0] , r[0] , n[1]+1, r[1]-1, n[2]+1, r[2]-1), lattice3(noise, n[0]+1, r[0]-1, n[1]+1, r[1]-1, n[2]+1, r[2]-1), w[0]), w[1]), w[2]); //printf("noise3=%g\n", value); break; case 4: value = lerp(lerp(lerp(lerp(lattice4(noise, n[0], r[0], n[1], r[1], n[2], r[2], n[3], r[3]), lattice4(noise, n[0]+1, r[0]-1, n[1], r[1], n[2], r[2], n[3], r[3]), w[0]), lerp(lattice4(noise, n[0], r[0], n[1]+1, r[1]-1, n[2], r[2], n[3], r[3]), lattice4(noise, n[0]+1, r[0]-1, n[1]+1, r[1]-1, n[2], r[2], n[3], r[3]), w[0]), w[1]), lerp(lerp(lattice4(noise, n[0], r[0], n[1], r[1], n[2]+1, r[2]-1, n[3], r[3]), lattice4(noise, n[0]+1, r[0]-1, n[1], r[1], n[2]+1, r[2]-1, n[3], r[3]), w[0]), lerp(lattice4(noise, n[0], r[0], n[1]+1, r[1]-1, n[2]+1, r[2]-1, n[3], r[3]), lattice4(noise, n[0]+1, r[0]-1, n[1]+1, r[1]-1, n[2]+1, r[2]-1, n[3], r[3]), w[0]), w[1]), w[2]), lerp(lerp(lerp(lattice4(noise, n[0], r[0], n[1], r[1], n[2], r[2], n[3]+1, r[3]-1), lattice4(noise, n[0]+1, r[0]-1, n[1], r[1], n[2], r[2], n[3]+1, r[3]-1), w[0]), lerp(lattice4(noise, n[0], r[0], n[1]+1, r[1]-1, n[2], r[2], n[3]+1, r[3]-1), lattice4(noise, n[0]+1, r[0]-1, n[1]+1, r[1]-1, n[2], r[2], n[3]+1, r[3]-1), w[0]), w[1]), lerp(lerp(lattice4(noise, n[0], r[0], n[1], r[1], n[2]+1, r[2]-1, n[3]+1, r[3]-1), lattice4(noise, n[0]+1, r[0]-1, n[1], r[1], n[2]+1, r[2]-1, n[3]+1, r[3]-1), w[0]), lerp(lattice4(noise, n[0], r[0], n[1]+1, r[1]-1, n[2]+1, r[2]-1, n[3]+1, r[3]-1), lattice4(noise, n[0]+1, r[0]-1, n[1]+1, r[1]-1, n[2]+1, r[2]-1, n[3]+1, r[3]-1), w[0]), w[1]), w[2]), w[3]); break; default: abort(); } return clamp(-0.99999f, 0.99999f, value); }