Args3D<T>::Args3D(int _width, int _height, int _depth){ width = _width; height = _height; depth = _depth; gpuErrchk( cudaHostAlloc((void **) &hostArray, width*height*depth*sizeof(T), cudaHostAllocWriteCombined | cudaHostAllocMapped) ); gpuErrchk( cudaHostGetDevicePointer(&deviceArray, hostArray, 0) ); }
Args<T>::Args(int _width){ width = _width; gpuErrchk( cudaDeviceReset() ); gpuErrchk( cudaSetDeviceFlags(cudaDeviceMapHost) ); gpuErrchk( cudaHostAlloc((void **) &hostArray, width*sizeof(T), cudaHostAllocWriteCombined | cudaHostAllocMapped) ); gpuErrchk( cudaHostGetDevicePointer(&deviceArray, hostArray, 0) ); }
pbf_dam_sim::pbf_dam_sim(scalar_t space) { init_cond = make_shared<pbf_dam_init_cond>(space); init_cond->getParameter(simulatee.parameter); init_cond->getExternalForce(simulatee.external); auto domain = init_cond->getDomainRange(); const auto num_capacity = 40000; simulatee.allocate(num_capacity, domain.second); buffer.allocate(num_capacity, simulatee.ns->getMaxPairParticleNum()); vector<glm::vec3> x; vector<glm::vec3> v; init_cond->getDomainParticlePhaseHost(x, v); simulatee.phase.num = x.size(); cout << "initial particle number: " << x.size() << endl; #pragma region gl_interpo_init swVBOs::getInstance().enroll("pbf_particle"); glBindBuffer(GL_ARRAY_BUFFER, swVBOs::getInstance().findVBO("pbf_particle")); glBufferData(GL_ARRAY_BUFFER, x.size() * sizeof(glm::vec3), x.data(), GL_STATIC_DRAW); gpuErrchk(cudaDeviceSynchronize()); cudaGraphicsGLRegisterBuffer(&cu_res, swVBOs::getInstance().findVBO("pbf_particle"), cudaGraphicsMapFlagsNone); #ifndef NDEBUG gpuErrchk(cudaPeekAtLastError()); gpuErrchk(cudaDeviceSynchronize()); #endif // modify particles data cudaFree(simulatee.phase.x); cudaMemcpy(simulatee.phase.v, v.data(), v.size() * sizeof(dom_dim), cudaMemcpyHostToDevice); #pragma endregion }
void CUDARenderer::draw(const glm::mat4 &View, const glm::mat4 &Projection) { setCameraInfo(camera.cam); gpuErrchk(cudaGraphicsMapResources(1, &cudaSurfRes)); { cudaArray *viewCudaArray; gpuErrchk(cudaGraphicsSubResourceGetMappedArray(&viewCudaArray, cudaSurfRes, 0, 0)); renderToTexture(dimBlock, dimGrid, d_scene, viewCudaArray); } gpuErrchk(cudaGraphicsUnmapResources(1, &cudaSurfRes)); glDrawArrays(GL_TRIANGLES, 0, 6); }
void CUDARenderer::initCUDA() { gpuErrchk(cudaSetDevice(0)); gpuErrchk(cudaGLSetGLDevice(0)); gpuErrchk(cudaMalloc((void **)&d_scene, sizeof(SlowScene))); gpuErrchk(cudaMemcpy(d_scene, &h_scene, sizeof(SlowScene), cudaMemcpyHostToDevice)); setImageInfo(h_info); gpuErrchk(cuCtxSetLimit(CU_LIMIT_STACK_SIZE, 1024 * 10)); dimBlock = dim3(THREAD_DIM, THREAD_DIM, 1); dimGrid = dim3(h_info.width / dimBlock.x + (h_info.width % dimBlock.x > 0), h_info.height / dimBlock.y + (h_info.height % dimBlock.y > 0), 1); snapshot.init(); }
CUDARenderer::~CUDARenderer(void) { gpuErrchk(cudaFree(d_scene)); deleteTexture(); vao.destroy(); vertices.destroy(); }
void pbf_dam_sim::simulateOneStep() { //simulatee.external.body_force cudaGraphicsMapResources(1, &cu_res); #ifndef NDEBUG gpuErrchk(cudaPeekAtLastError()); gpuErrchk(cudaDeviceSynchronize()); #endif size_t pos_size; cudaGraphicsResourceGetMappedPointer((void**)&simulatee.phase.x, &pos_size, cu_res); #ifndef NDEBUG gpuErrchk(cudaPeekAtLastError()); gpuErrchk(cudaDeviceSynchronize()); #endif one_step(simulatee, buffer, domain, 3); cudaGraphicsUnmapResources(1, &cu_res); simulatee.phase.x = NULL; }
void PhysicsProcessor::init(const SlowScene &h_scene) { gpuErrchk(cudaMalloc((void**)&d_V, sizeof(SphereVelocities))); gpuErrchk(cudaMemset(d_V, 0, sizeof(SphereVelocities))); SphereMasses h_M; int k = h_scene.numSpheres; for(int i = 0; i < k; ++i) { float r = h_scene.spheres[i].radius; h_M[i] = (4.0f / 3.0f) * r * r * r * efl::PI; } setSphereMasses(h_M); int n = k * (k - 1) / 2; dimBlock1 = dim3(THREAD_DIM, THREAD_DIM, 1); dimGrid1 = dim3(n / THREAD_DIM + (n % THREAD_DIM > 0), n / THREAD_DIM + (n % THREAD_DIM > 0), 1); dimBlock2 = dim3(THREAD_DIM, 1, 1); dimGrid2 = dim3(k / THREAD_DIM + (k % THREAD_DIM > 0), 1, 1); }
void CUDARenderer::initTexture() { OGLtexture.bind(); OGLtexture.texImage(0, GL_RGBA32F, glm::vec3(h_info.width, h_info.height, 0), GL_RGBA, GL_UNSIGNED_BYTE, 0); OGLtexture.texParami(GL_TEXTURE_MAG_FILTER, GL_NEAREST); OGLtexture.texParami(GL_TEXTURE_MIN_FILTER, GL_NEAREST); OGLtexture.texParami(GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); OGLtexture.texParami(GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); gpuErrchk(cudaGraphicsGLRegisterImage(&cudaSurfRes, OGLtexture.getID(), GL_TEXTURE_2D, cudaGraphicsRegisterFlagsSurfaceLoadStore)); p.use(); glActiveTexture(GL_TEXTURE0); p.setUniform("renderedTexture", 0); }
void CUDARenderer::deleteTexture() { gpuErrchk(cudaGraphicsUnregisterResource(cudaSurfRes)); OGLtexture.destroy(); }
void MaskBase<T>::copyToDevice() { gpuErrchk ( cudaMemcpy(deviceMask, hostMask, dimension * size * sizeof(int),cudaMemcpyHostToDevice) ); gpuErrchk ( cudaMemcpy(deviceWeight, hostWeight, size * sizeof(T),cudaMemcpyHostToDevice) ); }
void MaskBase<T>::deviceAlloc() { gpuErrchk( cudaMalloc((void **) &deviceMask, dimension * size * sizeof (int)) ); gpuErrchk( cudaMalloc((void **) &deviceWeight, size * sizeof (T)) ); }
void VolumeRender::generate() { double start_time, end_time, dt; start_time = glfwGetTime(); // Load tree int32_t tree_size = 0; tree_data = 0; // Load from file if (_scene == 0) { tree_size = rendering::read_full_file_binary(_treeFilename, &tree_data); if (tree_size == 0) { fprintf(stderr, "Invalid tree file: %s\n", _treeFilename); exit(1); } } else { char filename_buffer[1024]; sprintf(filename_buffer, "scene%d.tree.%d", _scene, _depth); FILE* file = fopen(filename_buffer, "rb"); if (file != nullptr) { // Load tree printf("Loading file %s\n", filename_buffer); fclose(file); tree_size = rendering::read_full_file_binary(filename_buffer, &tree_data); if (tree_size == 0) { fprintf(stderr, "Invalid tree file: %s\n", filename_buffer); exit(1); } } else { // Generate printf("Generating scene for the first time, please wait..\n"); // Generate from sphere if (_scene == 1) { tree_size = rendering::genOctreeSphere((int32_t**)&tree_data, _depth, glm::vec3(0.5f, 0.5f, 0.5f), 0.4f); } // Generate from miku mesh if (_scene == 2) { if (_mesh.load("miku.md2")) { // Rotate mesh to right rotation (md2s are messed up like that..) glm::mat4 rotation = glm::rotate(180.0f, glm::vec3(0, 0, 1.0f)); rotation = glm::rotate(rotation, 90.0f, glm::vec3(0, 1.0f, 0)); _mesh.transform(rotation); tree_size = genOctreeMesh((int32_t**)&tree_data, _depth, &_mesh); } } // r = 1.5 sphere with embedded r = 1 sphere if (_scene == 3) { if (_mesh.load("miku.md2")) { // Rotate mesh to right rotation (md2s are messed up like that..) glm::mat4 rotation = glm::rotate(180.0f, glm::vec3(0, 0, 1.0f)); rotation = glm::rotate(rotation, 90.0f, glm::vec3(0, 1.0f, 0)); _mesh.transform(rotation); // Get min, max and func for octree generation glm::vec3 overall_min = *_mesh.getMin(); glm::vec3 overall_max = *_mesh.getMax(); overall_max += (overall_max - overall_min); // Make cube around bounds glm::vec3 extents = (overall_max - overall_min) * 0.5f; glm::vec3 centre = overall_min + extents; float greatest_extent = std::max(std::max(extents.x, extents.y), extents.z); extents.x = greatest_extent; extents.y = greatest_extent; extents.z = greatest_extent; overall_min = centre - extents; overall_max = centre + extents; // Sphere position glm::vec3 sphere_pos = centre; sphere_pos.x += extents.x * 0.5f; float sphere_radius = 10.0f; // Test function auto test_func = [&] (glm::vec3 min, glm::vec3 max, raw_attachment_uncompressed& shading_attributes) { glm::vec3 half_size = 0.5f * (max - min); glm::vec3 centre = min + half_size; float half_size_one_axis = half_size.x; //bool sphere_intersect = cubeSphereSurfaceIntersection(centre, half_size_one_axis, sphere_pos, sphere_radius); bool sphere_intersect = boxSphereIntersection(min, max, sphere_pos, sphere_radius); if (sphere_intersect) { // This is not normalised to save generation time // it is normalised later in the GPU anyway after being unpacked shading_attributes.normal = sphere_pos - centre; shading_attributes.colour = glm::vec4(1.0f, 1.0f, 1.0f, 0.5f); shading_attributes.reflectivity = 1.0f; shading_attributes.refractive_index = 1.5f; float distFromCentre = glm::length(shading_attributes.normal); if (distFromCentre < 0.5f * sphere_radius) shading_attributes.refractive_index = 1.0f; return true; } else { bool mesh_intersect = meshAABBIntersect(&_mesh, min, max, shading_attributes); if (mesh_intersect) { shading_attributes.colour.a = 1.0f; shading_attributes.reflectivity = 0.0f; return true; } } return false; }; point_test_func func(test_func); // Generate octree tree_size = genOctree((int32_t**)&tree_data, _depth, func, overall_min, overall_max); } } // r = 1.5 sphere with hollow sphere (should be the same as above) if (_scene == 4) { if (_mesh.load("miku.md2")) { // Rotate mesh to right rotation (md2s are messed up like that..) glm::mat4 rotation = glm::rotate(180.0f, glm::vec3(0, 0, 1.0f)); rotation = glm::rotate(rotation, 90.0f, glm::vec3(0, 1.0f, 0)); _mesh.transform(rotation); // Get min, max and func for octree generation glm::vec3 overall_min = *_mesh.getMin(); glm::vec3 overall_max = *_mesh.getMax(); overall_max += (overall_max - overall_min); // Make cube around bounds glm::vec3 extents = (overall_max - overall_min) * 0.5f; glm::vec3 centre = overall_min + extents; float greatest_extent = std::max(std::max(extents.x, extents.y), extents.z); extents.x = greatest_extent; extents.y = greatest_extent; extents.z = greatest_extent; overall_min = centre - extents; overall_max = centre + extents; // Sphere position glm::vec3 sphere_pos = centre; sphere_pos.x += extents.x * 0.5f; float sphere_radius = 10.0f; // Test function auto test_func = [&] (glm::vec3 min, glm::vec3 max, raw_attachment_uncompressed& shading_attributes) { glm::vec3 half_size = 0.5f * (max - min); glm::vec3 centre = min + half_size; float half_size_one_axis = half_size.x; //bool sphere_intersect = cubeSphereSurfaceIntersection(centre, half_size_one_axis, sphere_pos, sphere_radius); bool sphere_intersect = boxSphereIntersection(min, max, sphere_pos, sphere_radius); if (sphere_intersect) { // This is not normalised to save generation time // it is normalised later in the GPU anyway after being unpacked shading_attributes.normal = sphere_pos - centre; shading_attributes.colour = glm::vec4(1.0f, 1.0f, 1.0f, 0.5f); shading_attributes.reflectivity = 1.0f; shading_attributes.refractive_index = 1.5f; float distFromCentre = glm::length(shading_attributes.normal); if (distFromCentre < 0.5f * sphere_radius) return false; return true; } else { bool mesh_intersect = meshAABBIntersect(&_mesh, min, max, shading_attributes); if (mesh_intersect) { shading_attributes.colour.a = 1.0f; shading_attributes.reflectivity = 0.0f; return true; } } return false; }; point_test_func func(test_func); // Generate octree tree_size = genOctree((int32_t**)&tree_data, _depth, func, overall_min, overall_max); } } // Generate from mesh if (_scene == 5) { Mesh teapot("teapot.obj", true); Mesh checkerboard("checkerboard.obj", true); // Lift teapot off checkerboard a bit so they don't intersect glm::mat4 translation = glm::translate(glm::vec3(0.0f, 0.1f, 0.0f)); glm::mat4 rotation = glm::rotate(180.0f, glm::vec3(0, 0, 1.0f)); checkerboard.transform(translation * rotation); teapot.transform(rotation); // Get min, max and func for octree generation glm::vec3 overall_min = *teapot.getMin(); glm::vec3 overall_max = *teapot.getMax(); overall_max += (overall_max - overall_min); // Make cube around bounds glm::vec3 extents = (overall_max - overall_min) * 0.5f; glm::vec3 centre = overall_min + extents; float greatest_extent = std::max(std::max(extents.x, extents.y), extents.z); extents.x = greatest_extent; extents.y = greatest_extent; extents.z = greatest_extent; overall_min = centre - extents; overall_max = centre + extents; // Test function auto test_func = [&] (glm::vec3 min, glm::vec3 max, raw_attachment_uncompressed& shading_attributes) { if (meshAABBIntersect(&checkerboard, min, max, shading_attributes)) { shading_attributes.colour.a = 1.0f; shading_attributes.reflectivity = 0.0f; return true; } if (meshAABBIntersect(&teapot, min, max, shading_attributes)) { shading_attributes.colour.a = 0.3f; shading_attributes.reflectivity = 0.5f; return true; } return false; }; point_test_func func(test_func); // Generate octree tree_size = genOctree((int32_t**)&tree_data, _depth, func, overall_min, overall_max); } // transparent and reflective boxes on checkerboard if (_scene == 6) { Mesh checkerboard("checkerboard.obj", true); // The cube should have flat normals instead of smooth Mesh box1("cube.obj", true, aiProcess_GenNormals); Mesh box2("cube.obj", true, aiProcess_GenNormals); // Rotate checkerboard glm::mat4 rotation = glm::rotate(180.0f, glm::vec3(0, 0, 1.0f)); checkerboard.transform(rotation); // Scale and position box glm::mat4 translation = glm::translate(glm::vec3(0, 1.0f, 0)); glm::mat4 scale = glm::scale(glm::vec3(0.25f)); glm::mat4 box_transform = scale * translation; box1.transform(scale); box2.transform(scale); // Get min, max and func for octree generation glm::vec3 overall_min = *checkerboard.getMin(); glm::vec3 overall_max = *checkerboard.getMax(); overall_max += (overall_max - overall_min); // Make cube around bounds glm::vec3 extents = (overall_max - overall_min) * 0.5f; glm::vec3 centre = overall_min + extents; float greatest_extent = std::max(std::max(extents.x, extents.y), extents.z); extents.x = greatest_extent; extents.y = greatest_extent; extents.z = greatest_extent; overall_min = centre - extents; overall_max = centre + extents; // Test function auto test_func = [&] (glm::vec3 min, glm::vec3 max, raw_attachment_uncompressed& shading_attributes) { if (meshAABBIntersect(&checkerboard, min, max, shading_attributes)) { shading_attributes.colour.a = 1.0f; shading_attributes.reflectivity = 0.0f; return true; } if (meshAABBIntersect(&box1, min, max, shading_attributes)) { shading_attributes.colour.a = 1.0f; shading_attributes.reflectivity = 1.0f; return true; } return false; }; point_test_func func(test_func); // Generate octree tree_size = genOctree((int32_t**)&tree_data, _depth, func, overall_min, overall_max); } // Reflective inverted box with teapot inside if (_scene == 7) { // The cube should have flat normals instead of smooth Mesh teapot("teapot.obj", true); Mesh box1("inverted_cube.obj", true, aiProcess_GenNormals); teapot.transform(glm::scale(glm::vec3(0.2f)) * glm::rotate(180.0f, glm::vec3(0.0f, 0.0f, 1.0f))); // Get min, max and func for octree generation glm::vec3 overall_min = *box1.getMin(); glm::vec3 overall_max = *box1.getMax(); overall_max += (overall_max - overall_min); // Make cube around bounds glm::vec3 extents = (overall_max - overall_min) * 0.5f; glm::vec3 centre = overall_min + extents; float greatest_extent = std::max(std::max(extents.x, extents.y), extents.z); extents.x = greatest_extent; extents.y = greatest_extent; extents.z = greatest_extent; overall_min = centre - extents; overall_max = centre + extents; // Test function auto test_func = [&] (glm::vec3 min, glm::vec3 max, raw_attachment_uncompressed& shading_attributes) { glm::vec3 aabb_extents = (max - min) * 0.5f; glm::vec3 aabb_centre = aabb_centre + min; if (meshAABBIntersect(&box1, min, max, shading_attributes)) { shading_attributes.colour.a = 1.0f; shading_attributes.reflectivity = 1.0f; return true; } if (meshAABBIntersect(&teapot, min, max, shading_attributes)) { shading_attributes.colour = glm::vec4(1.0f, 0.0f, 0.0f, 1.0f); shading_attributes.reflectivity = 0.0f; return true; } return false; }; point_test_func func(test_func); // Generate octree tree_size = genOctree((int32_t**)&tree_data, _depth, func, overall_min, overall_max); } // Write custom generation to file if (_saveTrees) { printf("Writing %s...\n", filename_buffer); file = fopen(filename_buffer, "wb"); if (file == nullptr) { fprintf(stderr, "Failed to open file for writing: %s\n", filename_buffer); } else { fwrite(tree_data, tree_size, sizeof(char), file); fclose(file); } } } } // Calculate time to load/generate end_time = glfwGetTime(); dt = end_time - start_time; printf("Time to load/generate tree: %f\n", dt); printf("%.2fMB\n", tree_size / (1024.0f * 1024.0f)); // Reset timer start_time = glfwGetTime(); // Upload sphere to GPU gpuErrchk(cudaMalloc((void**)&_gpuTree, tree_size)); gpuErrchk(cudaMemcpy(_gpuTree, tree_data, tree_size, cudaMemcpyHostToDevice)); // Calculate time to upload end_time = glfwGetTime(); dt = end_time - start_time; printf("Time to upload tree: %f\n", dt); printf("%.2fMB\n", tree_size / (1024.0f * 1024.0f)); // Free CPU memory //free(tree_data); }
PhysicsProcessor::~PhysicsProcessor(void) { gpuErrchk(cudaFree(d_V)); }
// run a function on a specific gpu void set_device_and_run(int id, std::function<void()> fun) { gpuErrchk(cudaSetDevice(id)); fun(); }