void SPHSystem::animation() { if(sys_running != 1) { return; } set_parameters(hParam); calc_hash(dHash, dIndex,dMem, hParam->num_particle); sort_particles(dHash, dIndex, hParam->num_particle); find_start_end(dStart, dEnd, dHash, dIndex, hParam->num_particle, hParam->tot_cell); integrate_velocity(dMem, hParam->num_particle); compute(dMem, dHash, dIndex, dStart, dEnd, hParam->num_particle, hParam->tot_cell); copy_buffer(dMem, dPoints, hParam->num_particle); copy_array(hPoints, dPoints, sizeof(float2)*hParam->num_particle, CUDA_DEV_TO_HOST); copy_array(hMem, dMem, sizeof(Particle)*hParam->num_particle, CUDA_DEV_TO_HOST); }
void sph_simulation::simulate_single_frame(particle* in_particles, particle* out_particles) { // Calculate the optimal size for workgroups // Start groups size at their maximum, make them smaller if necessary // Optimally parameters.particles_count should be devisible by // CL_DEVICE_MAX_WORK_GROUP_SIZE // Refer to CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE unsigned int size_of_groups = running_device->getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); while (parameters.particles_count % size_of_groups != 0) { size_of_groups /= 2; } // Make sure that the workgroups are small enough and that the particle data // will fit in local memory assert(size_of_groups <= running_device->getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>()); assert(size_of_groups * sizeof(particle) <= running_device->getInfo<CL_DEVICE_LOCAL_MEM_SIZE>()); // Initial transfer to the GPU check_cl_error(queue_.enqueueWriteBuffer( front_buffer_, CL_TRUE, 0, sizeof(particle) * parameters.particles_count, in_particles)); // Recalculate the boundaries of the grid since the particles probably moved // since the last frame. cl_float min_x, max_x, min_y, max_y, min_z, max_z; cl_float grid_cell_side_length = (parameters.h * 2); min_x = min_y = min_z = std::numeric_limits<cl_int>::max(); max_x = max_y = max_z = std::numeric_limits<cl_int>::min(); for (size_t i = 0; i < parameters.particles_count; ++i) { cl_float3 pos = in_particles[i].position; if (pos.s[0] < min_x) min_x = pos.s[0]; if (pos.s[1] < min_y) min_y = pos.s[1]; if (pos.s[2] < min_z) min_z = pos.s[2]; if (pos.s[0] > max_x) max_x = pos.s[0]; if (pos.s[1] > max_y) max_y = pos.s[1]; if (pos.s[2] > max_z) max_z = pos.s[2]; } // Add or subtracts a cell length to all sides to create a padding layer // This simplifies calculations further down the line min_x -= grid_cell_side_length * 2; min_y -= grid_cell_side_length * 2; min_z -= grid_cell_side_length * 2; max_x += grid_cell_side_length * 2; max_y += grid_cell_side_length * 2; max_z += grid_cell_side_length * 2; parameters.min_point.s[0] = min_x; parameters.min_point.s[1] = min_y; parameters.min_point.s[2] = min_z; parameters.max_point.s[0] = max_x; parameters.max_point.s[1] = max_y; parameters.max_point.s[2] = max_z; parameters.grid_size_x = static_cast<cl_uint>((max_x - min_x) / grid_cell_side_length); parameters.grid_size_y = static_cast<cl_uint>((max_y - min_y) / grid_cell_side_length); parameters.grid_size_z = static_cast<cl_uint>((max_z - min_z) / grid_cell_side_length); // The Z-curve uses interleaving of bits in a uint to caculate the index. // This means we have floor(32/dimension_count) bits to represent each // dimension. assert(parameters.grid_size_x < 1024); assert(parameters.grid_size_y < 1024); assert(parameters.grid_size_z < 1024); parameters.grid_cell_count = get_grid_index_z_curve( parameters.grid_size_x, parameters.grid_size_y, parameters.grid_size_z); // Locate each particle in the grid and build the grid count table unsigned int* cell_table = new unsigned int[parameters.grid_cell_count]; set_kernel_args(kernel_locate_in_grid_, front_buffer_, back_buffer_, parameters); check_cl_error(queue_.enqueueNDRangeKernel( kernel_locate_in_grid_, cl::NullRange, cl::NDRange(parameters.particles_count), cl::NDRange(size_of_groups))); check_cl_error(queue_.enqueueReadBuffer( back_buffer_, CL_TRUE, 0, sizeof(particle) * parameters.particles_count, out_particles)); sort_particles(out_particles, back_buffer_, front_buffer_, cell_table); cl::Buffer cell_table_buffer( context_, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(unsigned int) * parameters.grid_cell_count); check_cl_error(queue_.enqueueWriteBuffer( cell_table_buffer, CL_TRUE, 0, sizeof(unsigned int) * parameters.grid_cell_count, cell_table)); check_cl_error(queue_.enqueueWriteBuffer( front_buffer_, CL_TRUE, 0, sizeof(particle) * parameters.particles_count, out_particles)); // Compute the density and the pressure term at every particle. check_cl_error(kernel_density_pressure_.setArg(0, front_buffer_)); check_cl_error(kernel_density_pressure_.setArg( 1, size_of_groups * sizeof(particle), nullptr)); // Declare local memory in arguments check_cl_error(kernel_density_pressure_.setArg(2, back_buffer_)); check_cl_error(kernel_density_pressure_.setArg(3, parameters)); check_cl_error(kernel_density_pressure_.setArg(4, precomputed_terms)); check_cl_error(kernel_density_pressure_.setArg(5, cell_table_buffer)); check_cl_error(queue_.enqueueNDRangeKernel( kernel_density_pressure_, cl::NullRange, cl::NDRange(parameters.particles_count), cl::NDRange(size_of_groups))); cl::Buffer face_normals_buffer( context_, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(float) * current_scene.face_normals.size()); cl::Buffer vertices_buffer(context_, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(float) * current_scene.vertices.size()); cl::Buffer indices_buffer( context_, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(unsigned int) * current_scene.indices.size()); check_cl_error(queue_.enqueueWriteBuffer( face_normals_buffer, CL_TRUE, 0, sizeof(float) * current_scene.face_normals.size(), current_scene.face_normals.data())); check_cl_error( queue_.enqueueWriteBuffer(vertices_buffer, CL_TRUE, 0, sizeof(float) * current_scene.vertices.size(), current_scene.vertices.data())); check_cl_error(queue_.enqueueWriteBuffer( indices_buffer, CL_TRUE, 0, sizeof(unsigned int) * current_scene.indices.size(), current_scene.indices.data())); // Compute the density-forces at every particle. set_kernel_args(kernel_forces_, back_buffer_, front_buffer_, parameters, precomputed_terms, cell_table_buffer); check_cl_error(queue_.enqueueNDRangeKernel( kernel_forces_, cl::NullRange, cl::NDRange(parameters.particles_count), cl::NDRange(size_of_groups))); // Advect particles and resolve collisions with scene geometry. set_kernel_args(kernel_advection_collision_, front_buffer_, back_buffer_, parameters, precomputed_terms, cell_table_buffer, face_normals_buffer, vertices_buffer, indices_buffer, current_scene.face_count); check_cl_error(queue_.enqueueNDRangeKernel( kernel_advection_collision_, cl::NullRange, cl::NDRange(parameters.particles_count), cl::NDRange(size_of_groups))); check_cl_error(queue_.enqueueReadBuffer( back_buffer_, CL_TRUE, 0, sizeof(particle) * parameters.particles_count, out_particles)); delete[] cell_table; }
void particles_render(void) { extern int lights_on; int i; float diffuse[4] = { 1, 1, 1, 0 }; float mag; sort_particles(); #ifdef NO_SMOKELIGHT glDisable(GL_LIGHTING); #else if ( lights_on ) { glDisable(GL_LIGHT0); glEnable(GL_LIGHT3); } apply_mat(MAT_SMOKE); #endif glEnable(GL_BLEND); glBlendFunc (GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glEnable(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D, texid); glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); glBegin(GL_QUADS); for(i=0; i<PARTICLE_COUNT; i++) { if ( particles[i].lifespan <= 0 ) continue; #ifndef NO_SMOKELIGHT mag = sqrt( particles[i].pos.x*particles[i].pos.x + particles[i].pos.y*particles[i].pos.y + particles[i].pos.z*particles[i].pos.z); mag /= 2; glNormal3f( -particles[i].pos.x/mag, -particles[i].pos.y/mag, -particles[i].pos.z/mag ); diffuse[0] = colors[sm_color][0]; diffuse[1] = colors[sm_color][1]; diffuse[2] = colors[sm_color][2]; diffuse[3] = 0.5 * (float)particles[i].lifespan / ((float)MAX_LIFESPAN); glMaterialfv(GL_FRONT, GL_AMBIENT, diffuse); glMaterialfv(GL_FRONT, GL_DIFFUSE, diffuse); #else glColor4f( colors[sm_color][0], colors[sm_color][1], colors[sm_color][2], 0.5 * (float)particles[i].lifespan / ((float)MAX_LIFESPAN) ); #endif glTexCoord2f(0,1); glVertex3f( particles[i].pos.x - PARTICLE_WIDTH, particles[i].pos.y + PARTICLE_WIDTH, particles[i].pos.z ); glTexCoord2f(0,0); glVertex3f( particles[i].pos.x -PARTICLE_WIDTH, particles[i].pos.y - PARTICLE_WIDTH, particles[i].pos.z ); glTexCoord2f(1,0); glVertex3f( particles[i].pos.x + PARTICLE_WIDTH, particles[i].pos.y - PARTICLE_WIDTH, particles[i].pos.z ); glTexCoord2f(1,1); glVertex3f( particles[i].pos.x + PARTICLE_WIDTH, particles[i].pos.y + PARTICLE_WIDTH, particles[i].pos.z ); } glEnd(); glDisable(GL_COLOR_MATERIAL); glDisable(GL_TEXTURE_2D); glDisable(GL_BLEND); if ( lights_on ) { glEnable(GL_LIGHT0); glDisable(GL_LIGHT3); } }