void dxModel::LoadFaces(lump_s *l) { typedef dxItemBuffer<dxVec2> dxVec2Buffer; typedef dxItemBuffer<dxVec3> dxVec3Buffer; dxVec3Buffer vertices_buffer(512 * 1024); dxVec2Buffer txcoord1_buffer(512 * 1024); dxVec2Buffer txcoord2_buffer(512 * 1024); struct dface_s { short planenum; short side; int firstedge; // we must support > 64k edges short numedges; short texinfo; // lighting info byte styles[MAXLIGHTMAPS]; int lightofs; // start of [numstyles*surfsize] samples }; //numsufvet = 0; dface_s * in = (dface_s*)(mod_base + l->fileofs); if (l->filelen % sizeof(*in)) { Sys_Error("bad face count"); } int count = l->filelen / sizeof(*in); surfaces = (msurface_s*)DX_MEM_ALLOC(sizeof(msurface_s) * count); numsurfaces = count; msurface_s * out = surfaces; dxVec3 temp_vertices[32]; dxVec2 temp_txcoord1[32]; dxVec2 temp_txcoord2[32]; for (int i = 0; i < count; i++, in++, out++) { out->firstedge = Swap_LittleLong(in->firstedge); out->numedges = Swap_LittleLong(in->numedges); out->flags = SURF_NORMAL; out->lightmap = NULL; out->light_s = 0; out->light_t = 0; short planenum = Swap_LittleShort(in->planenum); short side = Swap_LittleShort(in->side); if (side) { out->flags |= SURF_PLANEBACK; } out->plane = planes + planenum; short s = Swap_LittleShort(in->texinfo); if (s >= numtexinfo) { Sys_Error("bad texinfo index"); } out->texinfo = texinfo + s; out->numverts = in->numedges; out->vet_offset = vertices_buffer.GetCount(); int tex_w = 1; int tex_h = 1; float * v1 = out->texinfo->vecs[0]; float * v2 = out->texinfo->vecs[1]; if (out->texinfo->texture) { tex_w = out->texinfo->texture->width; tex_h = out->texinfo->texture->height; } if (out->numverts > 32) { Sys_Error("too many surface vertices"); } for (int j = 0; j < out->numverts; j++) { int e = surfedges[in->firstedge + j]; if (e >= 0) { temp_vertices[j] = vertices[edges[e].v[0]]; } else { temp_vertices[j] = vertices[edges[-e].v[1]]; } if (out->texinfo->texture) { float * vts = temp_vertices[j]; float s = v1[0] * vts[0] + v1[1] * vts[1] + v1[2] * vts[2] + v1[3]; float t = v2[0] * vts[0] + v2[1] * vts[1] + v2[2] * vts[2] + v2[3]; temp_txcoord1[j].x = s / tex_w; temp_txcoord1[j].y = t / tex_h; } else { temp_txcoord1[j].x = 0; temp_txcoord1[j].y = 0; } temp_txcoord2[j].x = 0; temp_txcoord2[j].y = 0; } CalcSurfaceExtents(out); //lighting info for (int j = 0; j < MAXLIGHTMAPS; j++) { out->styles[j] = in->styles[j]; } int lightofs = Swap_LittleLong(in->lightofs); if (-1 == lightofs) { out->samples = NULL; } else { out->samples = lightdata + lightofs; } if (!out->texinfo->texture) { continue; } // set the drawing flags flag if (!strncmp(out->texinfo->texture->name, "sky", 3)) { out->flags |= (SURF_DRAWSKY | SURF_DRAWTILED); goto SET_VERT_BUF; } if (!strncmp(out->texinfo->texture->name, "black", 5)) { out->flags = SURF_NODRAW; goto SET_VERT_BUF; } if (!strncmp(out->texinfo->texture->name, "hint", 4)) { out->flags = SURF_NODRAW; goto SET_VERT_BUF; } if (!strncmp(out->texinfo->texture->name, "aaatrigger", 10)) { out->flags = SURF_NODRAW; goto SET_VERT_BUF; } if (!strncmp(out->texinfo->texture->name, "*", 1)) // turbulent { out->flags |= (SURF_DRAWTURB | SURF_DRAWTILED); for (int k = 0; k < 2; k++) { out->extents[k] = 16384; out->texturemins[k] = -8192; } //GL_SubdivideSurface (out); // cut up polygon for warps goto SET_VERT_BUF; } //load lightmap CreateSurfaceLightmap(out); //set lightmap texture coordinate for (int j = 0; j < out->numverts; j++) { float s = Vec3DotProduct(temp_vertices[j], out->texinfo->vecs[0]) + out->texinfo->vecs[0][3]; s -= out->texturemins[0]; s += out->light_s * 16; s += 8; s /= BLOCK_WIDTH * 16; float t = Vec3DotProduct(temp_vertices[j], out->texinfo->vecs[1]) + out->texinfo->vecs[1][3]; t -= out->texturemins[1]; t += out->light_t * 16; t += 8; t /= BLOCK_HEIGHT * 16; temp_txcoord2[j].x = s; temp_txcoord2[j].y = t; } SET_VERT_BUF: for (int j = 0; j < out->numverts; j++) { vertices_buffer.Add(temp_vertices[j]); txcoord1_buffer.Add(temp_txcoord1[j]); txcoord2_buffer.Add(temp_txcoord2[j]); } } renderer.LoadLightmaps(); vertbuf_model = renderer.CreateVertexBufferVec3(vertices_buffer.GetBuffer(), vertices_buffer.GetCount()); texcoord1_model = renderer.CreateVertexBufferVec2(txcoord1_buffer.GetBuffer(), txcoord1_buffer.GetCount()); texcoord2_model = renderer.CreateVertexBufferVec2(txcoord2_buffer.GetBuffer(), txcoord2_buffer.GetCount()); }
void sph_simulation::simulate_single_frame(particle* in_particles, particle* out_particles) { // Calculate the optimal size for workgroups // Start groups size at their maximum, make them smaller if necessary // Optimally parameters.particles_count should be devisible by // CL_DEVICE_MAX_WORK_GROUP_SIZE // Refer to CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE unsigned int size_of_groups = running_device->getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); while (parameters.particles_count % size_of_groups != 0) { size_of_groups /= 2; } // Make sure that the workgroups are small enough and that the particle data // will fit in local memory assert(size_of_groups <= running_device->getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>()); assert(size_of_groups * sizeof(particle) <= running_device->getInfo<CL_DEVICE_LOCAL_MEM_SIZE>()); // Initial transfer to the GPU check_cl_error(queue_.enqueueWriteBuffer( front_buffer_, CL_TRUE, 0, sizeof(particle) * parameters.particles_count, in_particles)); // Recalculate the boundaries of the grid since the particles probably moved // since the last frame. cl_float min_x, max_x, min_y, max_y, min_z, max_z; cl_float grid_cell_side_length = (parameters.h * 2); min_x = min_y = min_z = std::numeric_limits<cl_int>::max(); max_x = max_y = max_z = std::numeric_limits<cl_int>::min(); for (size_t i = 0; i < parameters.particles_count; ++i) { cl_float3 pos = in_particles[i].position; if (pos.s[0] < min_x) min_x = pos.s[0]; if (pos.s[1] < min_y) min_y = pos.s[1]; if (pos.s[2] < min_z) min_z = pos.s[2]; if (pos.s[0] > max_x) max_x = pos.s[0]; if (pos.s[1] > max_y) max_y = pos.s[1]; if (pos.s[2] > max_z) max_z = pos.s[2]; } // Add or subtracts a cell length to all sides to create a padding layer // This simplifies calculations further down the line min_x -= grid_cell_side_length * 2; min_y -= grid_cell_side_length * 2; min_z -= grid_cell_side_length * 2; max_x += grid_cell_side_length * 2; max_y += grid_cell_side_length * 2; max_z += grid_cell_side_length * 2; parameters.min_point.s[0] = min_x; parameters.min_point.s[1] = min_y; parameters.min_point.s[2] = min_z; parameters.max_point.s[0] = max_x; parameters.max_point.s[1] = max_y; parameters.max_point.s[2] = max_z; parameters.grid_size_x = static_cast<cl_uint>((max_x - min_x) / grid_cell_side_length); parameters.grid_size_y = static_cast<cl_uint>((max_y - min_y) / grid_cell_side_length); parameters.grid_size_z = static_cast<cl_uint>((max_z - min_z) / grid_cell_side_length); // The Z-curve uses interleaving of bits in a uint to caculate the index. // This means we have floor(32/dimension_count) bits to represent each // dimension. assert(parameters.grid_size_x < 1024); assert(parameters.grid_size_y < 1024); assert(parameters.grid_size_z < 1024); parameters.grid_cell_count = get_grid_index_z_curve( parameters.grid_size_x, parameters.grid_size_y, parameters.grid_size_z); // Locate each particle in the grid and build the grid count table unsigned int* cell_table = new unsigned int[parameters.grid_cell_count]; set_kernel_args(kernel_locate_in_grid_, front_buffer_, back_buffer_, parameters); check_cl_error(queue_.enqueueNDRangeKernel( kernel_locate_in_grid_, cl::NullRange, cl::NDRange(parameters.particles_count), cl::NDRange(size_of_groups))); check_cl_error(queue_.enqueueReadBuffer( back_buffer_, CL_TRUE, 0, sizeof(particle) * parameters.particles_count, out_particles)); sort_particles(out_particles, back_buffer_, front_buffer_, cell_table); cl::Buffer cell_table_buffer( context_, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(unsigned int) * parameters.grid_cell_count); check_cl_error(queue_.enqueueWriteBuffer( cell_table_buffer, CL_TRUE, 0, sizeof(unsigned int) * parameters.grid_cell_count, cell_table)); check_cl_error(queue_.enqueueWriteBuffer( front_buffer_, CL_TRUE, 0, sizeof(particle) * parameters.particles_count, out_particles)); // Compute the density and the pressure term at every particle. check_cl_error(kernel_density_pressure_.setArg(0, front_buffer_)); check_cl_error(kernel_density_pressure_.setArg( 1, size_of_groups * sizeof(particle), nullptr)); // Declare local memory in arguments check_cl_error(kernel_density_pressure_.setArg(2, back_buffer_)); check_cl_error(kernel_density_pressure_.setArg(3, parameters)); check_cl_error(kernel_density_pressure_.setArg(4, precomputed_terms)); check_cl_error(kernel_density_pressure_.setArg(5, cell_table_buffer)); check_cl_error(queue_.enqueueNDRangeKernel( kernel_density_pressure_, cl::NullRange, cl::NDRange(parameters.particles_count), cl::NDRange(size_of_groups))); cl::Buffer face_normals_buffer( context_, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(float) * current_scene.face_normals.size()); cl::Buffer vertices_buffer(context_, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(float) * current_scene.vertices.size()); cl::Buffer indices_buffer( context_, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(unsigned int) * current_scene.indices.size()); check_cl_error(queue_.enqueueWriteBuffer( face_normals_buffer, CL_TRUE, 0, sizeof(float) * current_scene.face_normals.size(), current_scene.face_normals.data())); check_cl_error( queue_.enqueueWriteBuffer(vertices_buffer, CL_TRUE, 0, sizeof(float) * current_scene.vertices.size(), current_scene.vertices.data())); check_cl_error(queue_.enqueueWriteBuffer( indices_buffer, CL_TRUE, 0, sizeof(unsigned int) * current_scene.indices.size(), current_scene.indices.data())); // Compute the density-forces at every particle. set_kernel_args(kernel_forces_, back_buffer_, front_buffer_, parameters, precomputed_terms, cell_table_buffer); check_cl_error(queue_.enqueueNDRangeKernel( kernel_forces_, cl::NullRange, cl::NDRange(parameters.particles_count), cl::NDRange(size_of_groups))); // Advect particles and resolve collisions with scene geometry. set_kernel_args(kernel_advection_collision_, front_buffer_, back_buffer_, parameters, precomputed_terms, cell_table_buffer, face_normals_buffer, vertices_buffer, indices_buffer, current_scene.face_count); check_cl_error(queue_.enqueueNDRangeKernel( kernel_advection_collision_, cl::NullRange, cl::NDRange(parameters.particles_count), cl::NDRange(size_of_groups))); check_cl_error(queue_.enqueueReadBuffer( back_buffer_, CL_TRUE, 0, sizeof(particle) * parameters.particles_count, out_particles)); delete[] cell_table; }