void OpenGLRenderer::renderPoints(const glm::vec3* positions, const Color256* colors, const int num, const Camera &camera) { //always use the point shaders to render points glUseProgram(points_program_); GLuint mvp_location = glGetUniformLocation(points_program_, "u_mvpMatrix"); //Declare CUDA device pointers for it to use float3* dptr_pos; float3* dptr_col; //Setup position buffer glBindBuffer(GL_ARRAY_BUFFER, buffers_[0]); glBufferData(GL_ARRAY_BUFFER, 3 * num*sizeof(float), NULL, GL_DYNAMIC_DRAW); glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 0, NULL); glEnableVertexAttribArray(0); //Setup color buffer glBindBuffer(GL_ARRAY_BUFFER, buffers_[1]); glBufferData(GL_ARRAY_BUFFER, 3 * num*sizeof(float), NULL, GL_DYNAMIC_DRAW); glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 0, NULL); glEnableVertexAttribArray(1); //Register position and normal buffers with CUDA cudaGLRegisterBufferObject(buffers_[0]); cudaGLRegisterBufferObject(buffers_[1]); //Map buffers to CUDA cudaGLMapBufferObject((void**)&dptr_pos, buffers_[0]); cudaGLMapBufferObject((void**)&dptr_col, buffers_[1]); //Copy data to buffer with CUDA copyPointsToGL(positions, colors, dptr_pos, dptr_col, num); //Unmap buffers from CUDA cudaGLUnmapBufferObject(buffers_[0]); cudaGLUnmapBufferObject(buffers_[1]); //Unregister position and normal buffers with CUDA cudaGLUnregisterBufferObject(buffers_[0]); cudaGLUnregisterBufferObject(buffers_[1]); //Send the MVP Matrix glUniformMatrix4fv(mvp_location, 1, GL_FALSE, glm::value_ptr(camera.mvp)); //Draw glPointSize(1.0f); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glDrawArrays(GL_POINTS, 0, 3 * num); }
void raytrace() { uint* imagedata; cutilSafeCall(cudaGLMapBufferObject((void**)&imagedata, pbo)); float3 A, B, C; camera.getImagePlane(A, B, C); dev_camera d_cam(camera.getPosition(), A, B, C, aperture, focal); dev_light d_light(light.getPosition(), light.getColor(), 4096); //need to change here. float3 minAABB, maxAABB; world.getAABB(minAABB, maxAABB); sceneInfo scene(world.getNumTriangles(), world.getNumSpheres(), world.getNumBoxes(), minAABB, maxAABB); //TODO: add control for clear buffer here. //change here for the many object case raytraceImage(imagedata, dev_lastframe_ptr, dev_num_layers, r_width, r_height, moved, d_cam, d_light, scene); //for showing the real frame rate cudaMemcpy(&frame_num, dev_num_layers, sizeof(float), cudaMemcpyDeviceToHost); frame_num++; cudaMemcpy(dev_num_layers, &frame_num, sizeof(int), cudaMemcpyHostToDevice); cutilSafeCall(cudaGLUnmapBufferObject(pbo)); //download texture from pbo glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); glBindTexture(GL_TEXTURE_2D, framebuffer); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, r_width, r_height, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); CUT_CHECK_ERROR_GL(); }
void runCuda(){ // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer dptr=NULL; vbo = mesh->getVBO(); vbosize = mesh->getVBOsize(); float newcbo[] = {0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0}; cbo = newcbo; cbosize = 9; ibo = mesh->getIBO(); ibosize = mesh->getIBOsize(); cudaGLMapBufferObject((void**)&dptr, pbo); cudaRasterizeCore(dptr, glm::vec2(width, height), frame, vbo, vbosize, cbo, cbosize, ibo, ibosize); cudaGLUnmapBufferObject(pbo); vbo = NULL; cbo = NULL; ibo = NULL; frame++; fpstracker++; }
void runCuda(){ // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer dptr=NULL; vbo = mesh->getVBO(); vbosize = mesh->getVBOsize(); float newcbo[] = {0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0}; cbo = newcbo; cbosize = 9; ibo = mesh->getIBO(); ibosize = mesh->getIBOsize(); nbo = mesh->getNBO(); nbosize = mesh->getNBOsize(); cudaGLMapBufferObject((void**)&dptr, pbo); // Invert camera to convert to view matrix cudaRasterizeCore(glm::inverse(cam), projection, light, draw_mode, dptr, glm::vec2(width, height), frame, vbo, vbosize, nbo, nbosize, cbo, cbosize, ibo, ibosize); cudaGLUnmapBufferObject(pbo); vbo = NULL; cbo = NULL; ibo = NULL; frame++; fpstracker++; }
void runCuda() { ////////////////////// // Timing cuda call // ////////////////////// float time; cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer dptr=NULL; vbo = mesh->getVBO(); vbosize = mesh->getVBOsize(); nbo = mesh->getNBO(); nbosize = mesh->getNBOsize(); #if RGBONLY == 1 float newcbo[] = {0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0}; cbo = newcbo; cbosize = 9; #elif RGBONLY == 0 vec3 defaultColor(0.5f, 0.5f, 0.5f); mesh->changeColor(defaultColor); cbo = mesh->getCBO(); cbosize = mesh->getCBOsize(); #endif ibo = mesh->getIBO(); ibosize = mesh->getIBOsize(); cudaGLMapBufferObject((void**)&dptr, pbo); updateCamera(); cudaRasterizeCore(cam, dptr, glm::vec2(width, height), frame, vbo, vbosize, cbo, cbosize, ibo, ibosize, nbo, nbosize, lights, lightsize, alpha, beta, displayMode); cudaGLUnmapBufferObject(pbo); vbo = NULL; cbo = NULL; ibo = NULL; frame++; fpstracker++; ////////////////////// // Timing cuda call // ////////////////////// cudaEventRecord(stop, 0); cudaEventSynchronize(stop); cudaEventElapsedTime(&time, start, stop); printf("runCuda runtime: %3.1f ms \n", time); }
void runCuda() { if (camchanged) { iteration = 0; Camera &cam = renderState->camera; glm::vec3 v = cam.view; glm::vec3 u = cam.up; glm::vec3 r = glm::cross(v, u); glm::mat4 rotmat = glm::rotate(theta, r) * glm::rotate(phi, u); cam.view = glm::vec3(rotmat * glm::vec4(v, 0.f)); cam.up = glm::vec3(rotmat * glm::vec4(u, 0.f)); cam.position += cammove.x * r + cammove.y * u + cammove.z * v; // Camera to grid center float distance = cam.resolution.x / 2 / tan(cam.fov.x / 2); cam.toGrid = glm::vec3(cam.view.x*distance, cam.view.y*distance, cam.view.z*distance); // Find camera right vector float rAngle = -PI / 2; float qx = cam.view.x * sin(rAngle / 2); float qy = cam.view.y * sin(rAngle / 2); float qz = cam.view.z * sin(rAngle / 2); float qw = cos(rAngle / 2); glm::quat q = glm::quat(qw, qx, qy, qz); cam.right = q * cam.up; theta = phi = 0; cammove = glm::vec3(); camchanged = false; } // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer if (iteration == 0) { pathtraceFree(); pathtraceInit(scene); } if (iteration < renderState->iterations) { uchar4 *pbo_dptr = NULL; iteration++; cudaGLMapBufferObject((void**)&pbo_dptr, pbo); // execute the kernel int frame = 0; pathtrace(pbo_dptr, frame, iteration); // unmap buffer object cudaGLUnmapBufferObject(pbo); } else { saveImage(); pathtraceFree(); cudaDeviceReset(); exit(EXIT_SUCCESS); } }
void runCuda() { // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer float *dptrvert=NULL; float *velptr=NULL; cudaGLMapBufferObject((void**)&dptrvert, planetVBO); cudaGLMapBufferObject((void**)&velptr, velocityVBO); // execute the kernel cudaFlockingUpdateWrapper(DT, seekTarget); #if VISUALIZE == 1 cudaUpdateVBO(dptrvert, velptr); #endif // unmap buffer object cudaGLUnmapBufferObject(planetVBO); cudaGLUnmapBufferObject(velocityVBO); }
void Mandelbrot::WriteBuffer() { checkCudaErrors( cudaGLMapBufferObject( ( void** ) &this->devArray, this->buffer ), __LINE__, false ); cudaMemcpy( this->devArray, this->devCalcArray, this->iSize, cudaMemcpyDeviceToDevice ); checkCudaErrors( cudaGLUnmapBufferObject( this->buffer ), __LINE__, false ); this->bIsFlushed = true; }
void Renderer::render(const Camera& camera, float time) { // calc cam vars glm::vec3 A,B,C; { // camera ray C = glm::normalize(camera.getLookAt()-camera.getPosition()); // calc A (screen x) // calc B (screen y) then scale down relative to aspect // fov is for screen x axis A = glm::normalize(glm::cross(C,camera.getUp())); B = 1.0f/camera.getAspect()*glm::normalize(glm::cross(A,C)); // scale by FOV float tanFOV = tan(glm::radians(camera.getFOV())); A *= tanFOV; B *= tanFOV; } // cuda call unsigned int* out_data; checkCudaErrors(cudaGLMapBufferObject((void**)&out_data, pbo)); if (mode == RAYTRACE) { raytrace1(out_data, image_width, image_height, time, camera.getPosition(), A, B, C, scene_d, sceneSize); } else if (mode == PATHTRACE) { ++filmIters; pathtrace(out_data, image_width, image_height, time, camera.getPosition(), A, B, C, camera.m_lensRadius, camera.m_focalDist, scene_d, sceneSize, rand_d, rays_d, col_d, idx_d, film_d, filmIters); } checkCudaErrors(cudaGLUnmapBufferObject(pbo)); // download texture from destination PBO glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo); glActiveTexture(GL_TEXTURE0 + RENDER_TEXTURE); glBindTexture(GL_TEXTURE_2D, result_texture); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, image_width, image_height, GL_BGRA, GL_UNSIGNED_BYTE, NULL); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glActiveTexture(GL_TEXTURE0 + UNUSED_TEXTURE); SDK_CHECK_ERROR_GL(); fullScreenQuad.display(); }
void MappedVboVoid:: unmap(DataStorageVoid* datap) { if (_is_mapped) { TIME_MAPPEDVBOVOID TaskInfo("Unmapping vbo %u of size %s", (unsigned)*_vbo, DataStorageVoid::getMemorySizeText(datap->numberOfBytes()).c_str()); #ifdef USE_CUDA // make sure data is located in cuda datap->AccessStorage<CudaGlobalStorage>( true, false ); #ifdef CUDA_MEMCHECK_TEST // copy data back over the mapped memory *mapped_gl_mem = *datap; #endif // sync from cuda to vbo cudaGLUnmapBufferObject(*_vbo); // release resources mapped_gl_mem.reset(); datap->DiscardAllData(); _is_mapped = false; // The memory bound with Cuda-OpenGL-interop can be relied on. So // call cudaGetLastError to clear the cuda error state just in case. // (I'm not sure why but it might be related to cuda out-of-memory // errors elsewhere) cudaGetLastError(); #else // make sure data is located in cpu datap->AccessStorage<CpuMemoryStorage>( true, false ); // sync from mem to vbo glBindBuffer(_vbo->vbo_type(), *_vbo); glUnmapBuffer(_vbo->vbo_type()); glBindBuffer(_vbo->vbo_type(), 0); // release resources mapped_gl_mem.reset(); datap->DiscardAllData(); _is_mapped = false; #endif TIME_MAPPEDVBOVOID ComputationSynchronize(); if (_tt) { TaskInfo("Unmapped vbo %u of size %s", (unsigned)*_vbo, DataStorageVoid::getMemorySizeText(datap->numberOfBytes()).c_str()); delete _tt; _tt = 0; } } }
void runCuda() { // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer float4 *dptr=NULL; float *dptrvert=NULL; cudaGLMapBufferObject((void**)&dptr, pbo); cudaGLMapBufferObject((void**)&dptrvert, planetVBO); // execute the kernel cudaNBodyUpdateWrapper(DT); #if VISUALIZE == 1 cudaUpdatePBO(dptr, field_width, field_height); cudaUpdateVBO(dptrvert, field_width, field_height); #endif // unmap buffer object cudaGLUnmapBufferObject(planetVBO); cudaGLUnmapBufferObject(pbo); }
static void prepare_image_for_mapping(Data_Obj *dp) { #ifdef HAVE_OPENGL int t; cudaError_t e; // unmap buffer before using w/ GL if( BUF_IS_MAPPED(dp) ){ e = cudaGLUnmapBufferObject( OBJ_BUF_ID(dp) ); if( e != cudaSuccess ){ describe_cuda_driver_error2("update_cuda_viewer", "cudaGLUnmapBufferObject",e); NERROR1("failed to unmap buffer object"); } CLEAR_OBJ_FLAG_BITS(dp, DT_BUF_MAPPED); // propagate change to children and parents propagate_flag(dp,DT_BUF_MAPPED); } // //bind_texture(OBJ_DATA_PTR(dp)); glClear(GL_COLOR_BUFFER_BIT); /* sprintf(ERROR_STRING,"update_cuda_viewer: tex_id = %d, buf_id = %d", OBJ_TEX_ID(dp),OBJ_BUF_ID(dp)); advise(ERROR_STRING); */ glBindTexture(GL_TEXTURE_2D, OBJ_TEX_ID(dp)); #ifdef HAVE_LIBGLEW glBindBuffer(GL_PIXEL_UNPACK_BUFFER, OBJ_BUF_ID(dp)); #endif // HAVE_LIBGLEW #ifdef FOOBAR switch(OBJ_COMPS(dp)){ /* what used to be here??? */ } #endif /* FOOBAR */ t=gl_pixel_type(dp); glTexSubImage2D(GL_TEXTURE_2D, 0, // target, level 0, 0, // x0, y0 OBJ_COLS(dp), OBJ_ROWS(dp), // dx, dy t, GL_UNSIGNED_BYTE, // type OFFSET(0)); // offset into PIXEL_UNPACK_BUFFER #ifdef HAVE_LIBGLEW glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); #endif // HAVE_LIBGLEW }
//==================================== // Main loop //==================================== void RunCuda(){ // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer uchar4 *dptr = NULL; cudaGLMapBufferObject((void**)&dptr, m_pbo); // Execute the kernel CudaKernel(dptr, m_width, m_height, m_major, m_minor); // Unmap buffer object cudaGLUnmapBufferObject(m_pbo); }
//------------------------------- //---------RUNTIME STUFF--------- //------------------------------- void runCuda() { // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer dptr = NULL; cudaGLMapBufferObject((void **)&dptr, pbo); rasterize(dptr); cudaGLUnmapBufferObject(pbo); frame++; fpstracker++; }
// This is the normal display path void display(void) { cutilCheckError(cutStartTimer(timer)); // Sobel operation Pixel *data = NULL; cutilSafeCall(cudaGLMapBufferObject((void**)&data, pbo_buffer)); sobelFilter(data, imWidth, imHeight, g_SobelDisplayMode, imageScale ); cutilSafeCall(cudaGLUnmapBufferObject(pbo_buffer)); glClear(GL_COLOR_BUFFER_BIT); glBindTexture(GL_TEXTURE_2D, texid); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imWidth, imHeight, GL_LUMINANCE, GL_UNSIGNED_BYTE, OFFSET(0)); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glDisable(GL_DEPTH_TEST); glEnable(GL_TEXTURE_2D); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glBegin(GL_QUADS); glVertex2f(0, 0); glTexCoord2f(0, 0); glVertex2f(0, 1); glTexCoord2f(1, 0); glVertex2f(1, 1); glTexCoord2f(1, 1); glVertex2f(1, 0); glTexCoord2f(0, 1); glEnd(); glBindTexture(GL_TEXTURE_2D, 0); if (g_CheckRender && g_CheckRender->IsQAReadback() && g_Verify) { printf("> (Frame %d) readback BackBuffer\n", frameCount); g_CheckRender->readback( imWidth, imHeight ); g_CheckRender->savePPM ( sOriginal_ppm[g_Index], true, NULL ); if (!g_CheckRender->PPMvsPPM(sOriginal_ppm[g_Index], sReference_ppm[g_Index], MAX_EPSILON_ERROR, 0.15f)) { g_TotalErrors++; } g_Verify = false; } glutSwapBuffers(); cutilCheckError(cutStopTimer(timer)); computeFPS(); glutPostRedisplay(); }
// display results using OpenGL void display() { cutilCheckError(cutStartTimer(timer)); // execute filter, writing results to pbo unsigned int *d_result; cutilSafeCall(cudaGLMapBufferObject((void**)&d_result, pbo)); gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads); cutilSafeCall(cudaGLUnmapBufferObject(pbo)); // load texture from pbo glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); glBindTexture(GL_TEXTURE_2D, texid); glPixelStorei(GL_UNPACK_ALIGNMENT, 1); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, 0); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0); // display results glClear(GL_COLOR_BUFFER_BIT); glEnable(GL_TEXTURE_2D); glDisable(GL_DEPTH_TEST); glBegin(GL_QUADS); glTexCoord2f(0, 1); glVertex2f(0, 0); glTexCoord2f(1, 1); glVertex2f(1, 0); glTexCoord2f(1, 0); glVertex2f(1, 1); glTexCoord2f(0, 0); glVertex2f(0, 1); glEnd(); glDisable(GL_TEXTURE_2D); if (g_CheckRender && g_CheckRender->IsQAReadback() && g_Verify) { // readback for QA testing printf("> (Frame %d) Readback BackBuffer\n", frameCount); g_CheckRender->readback( width, height ); g_CheckRender->savePPM(sOriginal[g_Index], true, NULL); if (!g_CheckRender->PPMvsPPM(sOriginal[g_Index], sReference[g_Index], MAX_EPSILON_ERROR, 0.50f )) { g_TotalErrors++; } g_Verify = false; } glutSwapBuffers(); cutilCheckError(cutStopTimer(timer)); computeFPS(); }
void initCuda(){ // Use device with highest Gflops/s cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ); initPBO(&pbo); dptr=NULL; cudaGLMapBufferObject((void**)&dptr, pbo); clearPBOpos(dptr,width,height); cudaGLUnmapBufferObject(pbo); // Clean up on program exit atexit(cleanupCuda); SetScissorWindow(glm::vec4(300,300,500,500)); texture.mapptr = stbi_load("cow.jpeg",&texture.width, &texture.height,&texture.depth,0); runCuda(); }
// Run the Cuda part of the computation void runCuda() { uchar4 *dptr=NULL; // map OpenGL buffer object for writing from CUDA on a single GPU // no data is moved (Win & Linux). When mapped to CUDA, OpenGL // should not use this buffer cudaGLMapBufferObject((void**)&dptr, pbo); // execute the kernel launch_kernel(dptr, image_width, image_height, animTime); // unmap buffer object cudaGLUnmapBufferObject(pbo); }
void runCuda() { if (camchanged) { iteration = 0; Camera &cam = renderState->camera; glm::vec3 v = cam.view; glm::vec3 u = cam.up; glm::vec3 r = glm::cross(v, u); glm::mat4 rotmat = glm::rotate(theta, r) * glm::rotate(phi, u); cam.view = glm::vec3(rotmat * glm::vec4(v, 0.f)); cam.up = glm::vec3(rotmat * glm::vec4(u, 0.f)); cam.position += cammove.x * r + cammove.y * u + cammove.z * v; theta = phi = 0; cammove = glm::vec3(); camchanged = false; } // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer if (iteration == 0) { pathtraceFree(); pathtraceInit(scene); } if (iteration < renderState->iterations) { uchar4 *pbo_dptr = NULL; iteration++; cudaGLMapBufferObject((void**)&pbo_dptr, pbo); // execute the kernel if( scene->blur ) { for( int frame = 0; frame<(scene->frames); frame++ ){ pathtrace(pbo_dptr, frame, scene->frames, iteration); //printf("frame is : %d", frame); } } else { pathtrace(pbo_dptr, 0, 0, iteration); } // unmap buffer object cudaGLUnmapBufferObject(pbo); } else { saveImage(); pathtraceFree(); cudaDeviceReset(); exit(EXIT_SUCCESS); } }
//////////////////////////////////////////////////////////////////////////////// //! Run the Cuda part of the computation //////////////////////////////////////////////////////////////////////////////// void runCuda(GLuint vbo) { // map OpenGL buffer object for writing from CUDA float4 *dptr; cutilSafeCall(cudaGLMapBufferObject((void**)&dptr, vbo)); // execute the kernel // dim3 block(8, 8, 1); // dim3 grid(mesh_width / block.x, mesh_height / block.y, 1); // kernel<<< grid, block>>>(dptr, mesh_width, mesh_height, anim); launch_kernel(dptr, mesh_width, mesh_height, anim); // unmap buffer object cutilSafeCall(cudaGLUnmapBufferObject(vbo)); }
//==================================== // Main loop //==================================== void runCUDA() { // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not // use this buffer float4 *dptr = NULL; float *dptrvert = NULL; cudaGLMapBufferObject((void**)&dptrvert, planetVBO); // execute the kernel Nbody::stepSimulation(DT); #if VISUALIZE Nbody::copyPlanetsToVBO(dptrvert); #endif // unmap buffer object cudaGLUnmapBufferObject(planetVBO); }
void runCuda(){ // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer dptr=NULL; vbo = mesh->getVBO(); vbosize = mesh->getVBOsize(); float newcbo[] = {0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0}; cbo = newcbo; cbosize = 9; ibo = mesh->getIBO(); ibosize = mesh->getIBOsize(); nbo = mesh->getNBO(); nbosize = mesh->getNBOsize(); // Update view and model to projection transform matrices in each step when interacting with keyboard or mouse *view = glm::lookAt(cam.position, glm::vec3(0.0f), cam.up); *transformModel2Projection = utilityCore::glmMat4ToCudaMat4(*projection * *view * *model); viewPort = glm::normalize(utilityCore::multiplyMat(utilityCore::glmMat4ToCudaMat4(*projection * *view), glm::vec4(cam.view, 1.0f))); // Transformation Feedback std::cout << "\n The model-view-projection transformation is:" << std::endl; utilityCore::printMat4(*projection * *view * *model); std::cout << "\n The view port in the clip space is:" << std::endl; utilityCore::printVec3(viewPort); cudaGLMapBufferObject((void**)&dptr, pbo); cudaRasterizeCore(dptr, glm::vec2(width, height), frame, vbo, vbosize, cbo, cbosize, ibo, ibosize, nbo, nbosize, transformModel2Projection, viewPort, antialiasing, depthFlag, flatcolorFlag, color, multicolorFlag); cudaGLUnmapBufferObject(pbo); vbo = NULL; cbo = NULL; ibo = NULL; nbo = NULL; frame++; fpstracker++; }
static int cu2_unmap_buf(QSP_ARG_DECL Data_Obj *dp) { #ifdef HAVE_OPENGL cudaError_t e; e = cudaGLUnmapBufferObject( OBJ_BUF_ID(dp) ); if( e != cudaSuccess ){ describe_cuda_driver_error2("cu2_unmap_buf", "cudaGLUnmapBufferObject",e); ERROR1("failed to unmap buffer object"); return -1; } return 0; #else // ! HAVE_OPENGL WARN("cu2_unmap_buf: Sorry, no OpenGL support in this build!?"); return -1; #endif // ! HAVE_OPENGL }
void runCuda(){ // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer vbo = mesh->getVBO(); vbosize = mesh->getVBOsize(); nbo = mesh->getNBO(); nbosize = mesh->getNBOsize(); float newcbo[] = {0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0}; cbo = newcbo; cbosize = 9; ibo = mesh->getIBO(); ibosize = mesh->getIBOsize(); calcuatetransformationMatrix( eye,glm::vec2(width, height), front, back); dptr=NULL; cudaGLMapBufferObject((void**)&dptr, pbo); if(ReadBlendType() == ADD) { drawTexture(dptr,width, height,texture); } //clearPBOpos(dptr,width,height); cudaRasterizeCore(dptr, glm::vec2(width, height), frame, vbo, vbosize, nbo, nbosize, cbo, cbosize, ibo, ibosize); cudaGLUnmapBufferObject(pbo); vbo = NULL; cbo = NULL; ibo = NULL; frame++; fpstracker++; }
// display results using OpenGL void display() { sdkStartTimer(&timer); // execute filter, writing results to pbo unsigned int *d_result; checkCudaErrors(cudaGLMapBufferObject((void **)&d_result, pbo)); gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads); checkCudaErrors(cudaGLUnmapBufferObject(pbo)); // load texture from pbo glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); glBindTexture(GL_TEXTURE_2D, texid); glPixelStorei(GL_UNPACK_ALIGNMENT, 1); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, 0); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0); // display results glClear(GL_COLOR_BUFFER_BIT); glEnable(GL_TEXTURE_2D); glDisable(GL_DEPTH_TEST); glBegin(GL_QUADS); glTexCoord2f(0, 1); glVertex2f(0, 0); glTexCoord2f(1, 1); glVertex2f(1, 0); glTexCoord2f(1, 0); glVertex2f(1, 1); glTexCoord2f(0, 0); glVertex2f(0, 1); glEnd(); glDisable(GL_TEXTURE_2D); glutSwapBuffers(); sdkStopTimer(&timer); computeFPS(); }
static void select_reference_color(int x, int y) { static float offset = SCREEN_WIDTH/1280.0f; printf("(After scaling) Handling click on x = %d, y = %d\n", x, y); GLuint left_buffer, right_buffer, buffer; kb_images_current_buffers(&left_buffer, &right_buffer); printf("left texture id = %d, right = %d\n", left_buffer, right_buffer); uchar4 *gpu_buffer = NULL; if (x < 640*offset) { /* The clicked pixel is in the left image */ buffer = left_buffer; } else { /* The clicked pixel is in the right image */ buffer = right_buffer; x -= 640*offset; } cutilSafeCall(cudaGLMapBufferObject((void**)&gpu_buffer, buffer)); uchar4 pixel; cudaMemcpy(&pixel, gpu_buffer + (y * 640) + x, sizeof(uchar4), cudaMemcpyDeviceToHost); printf("pixel-value: %d, %d, %d (%d)\n", pixel.x, pixel.y, pixel.z, pixel.w); static char rgbbuffer[4096]; snprintf(rgbbuffer, sizeof(rgbbuffer), "%d,%d,%d", pixel.x, pixel.y, pixel.z); kb_ui_call_javascript("SetRGB", rgbbuffer); double r = pixel.z; double g = pixel.y; double b = pixel.x; double nominator = sqrt((r * r) + (g * g) + (b * b)); printf("nominator = %f\n", nominator); reference_color.x = r / nominator; reference_color.y = g / nominator; reference_color.z = b / nominator; printf("reference_color final: %f %f %f\n", reference_color.x, reference_color.y, reference_color.z); cutilSafeCall(cudaGLUnmapBufferObject(buffer)); }
void runCuda(){ // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer if(iterations<renderCam->iterations){ uchar4 *dptr=NULL; iterations++; cudaGLMapBufferObject((void**)&dptr, pbo); //pack geom and material arrays geom* geoms = new geom[renderScene->objects.size()]; material* materials = new material[renderScene->materials.size()]; map* maps = new map[renderScene->maps.size()]; for(int i=0; i<renderScene->objects.size(); i++){ geoms[i] = renderScene->objects[i]; } for(int i=0; i<renderScene->materials.size(); i++){ materials[i] = renderScene->materials[i]; } for(int i=0; i<renderScene->maps.size(); i++){ maps[i] = renderScene->maps[i]; } // execute the kernel if(!textureMode) cudaRaytraceCore(dptr, renderCam, targetFrame, iterations, materials, renderScene->materials.size(),maps,renderScene->maps.size(), geoms, renderScene->objects.size(), mblur,dof); else cudaRaytraceCoreT(dptr, renderCam, targetFrame, iterations, materials, renderScene->materials.size(),maps,renderScene->maps.size(), geoms, renderScene->objects.size(), mblur,dof); // unmap buffer object cudaGLUnmapBufferObject(pbo); }else{ if(!finishedRender){ //output image file image outputImage(renderCam->resolution.x, renderCam->resolution.y); image depthImage(renderCam->resolution.x, renderCam->resolution.y); for(int x=0; x<renderCam->resolution.x; x++){ for(int y=0; y<renderCam->resolution.y; y++){ int index = x + (y * renderCam->resolution.x); glm::vec3 justRGB(renderCam->image[index].x,renderCam->image[index].y,renderCam->image[index].z); outputImage.writePixelRGB(renderCam->resolution.x-1-x,y,justRGB); float d = abs(renderCam->image[index].w-renderCam->positions[targetFrame].z)/40.0f; depthImage.writePixelRGB(renderCam->resolution.x-1-x,y, glm::vec3(d,d,d)); } } gammaSettings gamma; gamma.applyGamma = true; gamma.gamma = 1.0/2.2; gamma.divisor = renderCam->iterations; outputImage.setGammaSettings(gamma); string filename = renderCam->imageName; string s; stringstream out; out << targetFrame; s = out.str(); utilityCore::replaceString(filename, ".bmp", "."+s+".bmp"); utilityCore::replaceString(filename, ".png", "."+s+".png"); outputImage.saveImageRGB(filename); depthImage.saveImageRGB("depth."+s+".bmp"); cout << "Saved frame " << s << " to " << filename << endl; finishedRender = true; if(singleFrameMode==true){ //cudaDeviceReset(); exit(0); } } if(targetFrame<renderCam->frames-1){ //clear image buffer and move onto next frame targetFrame++; iterations = 0; for(int i=0; i<renderCam->resolution.x*renderCam->resolution.y; i++){ renderCam->image[i] = glm::vec4(0,0,0,-1); } //cudaDeviceReset(); finishedRender = false; } } }
void runCuda(){ // Map OpenGL buffer object for writing from CUDA on a single GPU // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer if((unsigned int)iterations < renderCam->iterations){ uchar4 *dptr=NULL; ++iterations; cudaGLMapBufferObject((void**)&dptr, pbo); //pack geom and material arrays unsigned int objectsSize = renderScene->objects.size(), materialsSize = renderScene->materials.size(), lightsSize = renderScene->lights.size(); geom* geoms = new geom[objectsSize]; material* materials = new material[materialsSize]; light* lights = new light[lightsSize]; for(unsigned int i=0; i< objectsSize; ++i){ geoms[i] = renderScene->objects[i]; } for(unsigned int i=0; i< materialsSize; ++i){ materials[i] = renderScene->materials[i]; } for(unsigned int i=0; i< lightsSize; ++i){ lights[i] = renderScene->lights[i]; } // execute the kernel cudaRaytraceCore(dptr, renderCam, targetFrame, iterations, materials, materialsSize, geoms, objectsSize, lights, lightsSize); // unmap buffer object cudaGLUnmapBufferObject(pbo); }else{ if(!finishedRender){ //output image file image outputImage(renderCam->resolution.x, renderCam->resolution.y); for(int x=0; x<renderCam->resolution.x; ++x){ for(int y=0; y<renderCam->resolution.y; ++y){ int index = x + (y * renderCam->resolution.x); outputImage.writePixelRGB(x,y,renderCam->image[index]); } } gammaSettings gamma; gamma.applyGamma = true; gamma.gamma = 1.0/2.2; gamma.divisor = renderCam->iterations; outputImage.setGammaSettings(gamma); string filename = renderCam->imageName; string s; stringstream out; out << targetFrame; s = out.str(); utilityCore::replaceString(filename, ".bmp", "."+s+".bmp"); utilityCore::replaceString(filename, ".png", "."+s+".png"); outputImage.saveImageRGB(filename); cout << "Saved frame " << s << " to " << filename << endl; finishedRender = true; if(singleFrameMode==true){ cudaDeviceReset(); exit(0); } } if(targetFrame < renderCam->frames - 1){ //clear image buffer and move onto next frame ++targetFrame; iterations = 0; for(int i=0; i<renderCam->resolution.x*renderCam->resolution.y; ++i){ renderCam->image[i] = glm::vec3(0,0,0); } cudaDeviceReset(); finishedRender = false; } } }
// This is the normal display path static void update_pf_viewer(QSP_ARG_DECL Platform_Viewer *pvp, Data_Obj *dp) { #ifdef HAVE_OPENGL int t; //cudaError_t e; // unmap buffer before using w/ GL if( BUF_IS_MAPPED(dp) ){ if( (*PF_UNMAPBUF_FN(PFDEV_PLATFORM(OBJ_PFDEV(dp)))) (QSP_ARG dp) < 0 ) { WARN("update_pf_viewer: buffer unmap error!?"); } #ifdef FOOBAR e = cudaGLUnmapBufferObject( OBJ_BUF_ID(dp) ); if( e != cudaSuccess ){ describe_cuda_driver_error2("update_pf_viewer", "cudaGLUnmapBufferObject",e); NERROR1("failed to unmap buffer object"); } #endif // FOOBAR CLEAR_OBJ_FLAG_BITS(dp, DT_BUF_MAPPED); // propagate change to children and parents propagate_flag(dp,DT_BUF_MAPPED); } // //bind_texture(OBJ_DATA_PTR(dp)); glClear(GL_COLOR_BUFFER_BIT); /* sprintf(ERROR_STRING,"update_pf_viewer: tex_id = %d, buf_id = %d", OBJ_TEX_ID(dp),OBJ_BUF_ID(dp)); advise(ERROR_STRING); */ glBindTexture(GL_TEXTURE_2D, OBJ_TEX_ID(dp)); // is glBindBuffer REALLY part of libGLEW??? //#ifdef HAVE_LIBGLEW glBindBuffer(GL_PIXEL_UNPACK_BUFFER, OBJ_BUF_ID(dp)); //#endif // HAVE_LIBGLEW #ifdef FOOBAR switch(OBJ_COMPS(dp)){ /* what used to be here??? */ } #endif /* FOOBAR */ t=gl_pixel_type(dp); glTexSubImage2D(GL_TEXTURE_2D, 0, // target, level 0, 0, // x0, y0 OBJ_COLS(dp), OBJ_ROWS(dp), // dx, dy t, GL_UNSIGNED_BYTE, // type OFFSET(0)); // offset into PIXEL_UNPACK_BUFFER //#ifdef HAVE_LIBGLEW glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); //#endif // HAVE_LIBGLEW glBegin(GL_QUADS); glTexCoord2f(0, 1); glVertex2f(-1.0, -1.0); glTexCoord2f(0, 0); glVertex2f(-1.0, 1.0); glTexCoord2f(1, 0); glVertex2f(1.0, 1.0); glTexCoord2f(1, 1); glVertex2f(1.0, -1.0); glEnd(); glBindTexture(GL_TEXTURE_2D, 0); #ifdef FOOBAR e = cudaGLMapBufferObject( &OBJ_DATA_PTR(dp), OBJ_BUF_ID(dp) ); if( e != cudaSuccess ){ WARN("Error mapping buffer object!?"); // should we return now, with possibly other cleanup??? } #endif // FOOBAR if( (*PF_MAPBUF_FN(PFDEV_PLATFORM(OBJ_PFDEV(dp))))(QSP_ARG dp) < 0 ){ WARN("update_pf_viewer: Error mapping buffer!?"); } SET_OBJ_FLAG_BITS(dp, DT_BUF_MAPPED); // propagate change to children and parents propagate_flag(dp,DT_BUF_MAPPED); #else // ! HAVE_OPENGL NO_OGL_MSG #endif // ! HAVE_OPENGL }
void CMarchingCubes::ComputeIsosurface(ElemType* _pFval, ElemType _isoValue, RenderData* _pRender) { int threads = 128; dim3 grid(m_NumVoxels / threads, 1, 1); // get around maximum grid size of 65535 in each dimension if (grid.x > 65535) { grid.y = grid.x / 32768; grid.x = 32768; } uint totalVerts = 0; int size = m_GridSize.x * m_GridSize.y * m_GridSize.z * sizeof(float); ////////////////////////////////////////////////////////////////////////// int len = m_GridSize.x * m_GridSize.y * m_GridSize.z; float *pFvalTemp = new float[len]; for (int i = 0; i < len; i++) { pFvalTemp[i] = _pFval[i]; } ////////////////////////////////////////////////////////////////////////// float* pdVolumeFval; // ¶¥µãº¯ÊýÖµÎÆÀí(n¡¡Surface) cutilSafeCall(cudaMalloc((void**) &pdVolumeFval, size)); cutilSafeCall(cudaMemcpy(pdVolumeFval, pFvalTemp, size, cudaMemcpyHostToDevice) ); bindVolumeValTexture(pdVolumeFval); delete []pFvalTemp; // calculate number of vertices need per voxel launch_classifyVoxel(grid, threads, m_pdVoxelVerts, m_pdVoxelOccupied, pdVolumeFval, m_GridSize, m_NumVoxels, _isoValue); #if DEBUG_BUFFERS printf("voxelVerts:\n"); dumpBuffer(m_pdVoxelVerts, m_NumVoxels); #endif #if SKIP_EMPTY_VOXELS // scan voxel occupied array cudppScan(m_Scanplan, m_pdVoxelOccupiedScan, m_pdVoxelOccupied, m_NumVoxels); #if DEBUG_BUFFERS printf("voxelOccupiedScan:\n"); dumpBuffer(m_pdVoxelOccupiedScan, m_NumVoxels); #endif // read back values to calculate total number of non-empty voxels // since we are using an exclusive scan, the total is the last value of // the scan result plus the last value in the input array { uint lastElement, lastScanElement; cutilSafeCall(cudaMemcpy((void *) &lastElement, (void *) (m_pdVoxelOccupied + m_NumVoxels - 1), sizeof(uint), cudaMemcpyDeviceToHost)); cutilSafeCall(cudaMemcpy((void *) &lastScanElement, (void *) (m_pdVoxelOccupiedScan + m_NumVoxels - 1), sizeof(uint), cudaMemcpyDeviceToHost)); m_ActiveVoxels = lastElement + lastScanElement; } if (0 == m_ActiveVoxels) { // return if there are no full voxels totalVerts = 0; return; } // compact voxel index array launch_compactVoxels(grid, threads, m_pdCompactedVoxelArray, m_pdVoxelOccupied, m_pdVoxelOccupiedScan, m_NumVoxels); cutilCheckMsg("compactVoxels failed"); #endif // SKIP_EMPTY_VOXELS // scan voxel vertex count array cudppScan(m_Scanplan, m_pdVoxelVertsScan, m_pdVoxelVerts, m_NumVoxels); #if DEBUG_BUFFERS printf("voxelVertsScan:\n"); dumpBuffer(m_pdVoxelVertsScan, m_NumVoxels); #endif // readback total number of vertices { uint lastElement, lastScanElement; cutilSafeCall(cudaMemcpy((void *) &lastElement, (void *) (m_pdVoxelVerts + m_NumVoxels - 1), sizeof(uint), cudaMemcpyDeviceToHost)); cutilSafeCall(cudaMemcpy((void *) &lastScanElement, (void *) (m_pdVoxelVertsScan + m_NumVoxels - 1), sizeof(uint), cudaMemcpyDeviceToHost)); totalVerts = lastElement + lastScanElement; } // create VBOs GLuint posVbo, normalVbo; createVBO(&posVbo, totalVerts * sizeof(float) * 4); cutilSafeCall(cudaGLRegisterBufferObject(posVbo)); createVBO(&normalVbo, totalVerts * sizeof(float) * 4); cutilSafeCall(cudaGLRegisterBufferObject(normalVbo)); // generate triangles, writing to vertex buffers float4 *d_pos = 0, *d_normal = 0; cutilSafeCall(cudaGLMapBufferObject((void**)&d_pos, posVbo)); cutilSafeCall(cudaGLMapBufferObject((void**)&d_normal, normalVbo)); #if SKIP_EMPTY_VOXELS dim3 grid2((int) ceil(m_ActiveVoxels / (float) NTHREADS), 1, 1); #else dim3 grid2((int) ceil(m_NumVoxels / (float) NTHREADS), 1, 1); #endif while(grid2.x > 65535) { grid2.x/=2; grid2.y*=2; } launch_generateTriangles(grid2, NTHREADS, d_pos, d_normal, m_pdCompactedVoxelArray, m_pdVoxelVertsScan, m_pdVolume, pdVolumeFval, m_GridSize, _isoValue, m_ActiveVoxels, m_MaxVerts); cutilSafeCall(cudaGLUnmapBufferObject(normalVbo)); cutilSafeCall(cudaGLUnmapBufferObject(posVbo)); _pRender->posVbo = posVbo; _pRender->normalVbo = normalVbo; _pRender->totalVerts = totalVerts; cutilSafeCall(cudaFree(pdVolumeFval)); }