//////////////////////////////////////////////////////////////////////////////// //! Run the Cuda kernels //////////////////////////////////////////////////////////////////////////////// void runCuda() { size_t num_bytes; // generate wave spectrum in frequency domain cudaGenerateSpectrumKernel(d_h0, d_ht, fftInputW, fftInputH, animTime, patchSize); // execute inverse FFT to convert to spatial domain // DEPRECATED: cutilSafeCall(cudaGLMapBufferObject((void**)&g_hptr, heightVertexBuffer)); cutilSafeCall(cudaGraphicsMapResources(1, &cuda_heightVB_resource, 0)); // DEPRECATED: cutilSafeCall(cudaGLMapBufferObject((void**)&g_hptr, heightVertexBuffer)); cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void**)&g_hptr, &num_bytes, cuda_heightVB_resource)); cufftSafeCall( cufftExecC2R(fftPlan, (cufftComplex *) d_ht, g_hptr) ); // calculate slope for shading // DEPRECATED: cutilSafeCall(cudaGLMapBufferObject((void**)&g_sptr, slopeVertexBuffer)); cutilSafeCall(cudaGraphicsMapResources(1, &cuda_slopeVB_resource, 0)); // DEPRECATED: cutilSafeCall(cudaGraphicsMapBufferObject((void**)&g_sptr, slopeVertexBuffer)); cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void**)&g_sptr, &num_bytes, cuda_slopeVB_resource)); cudaCalculateSlopeKernel(g_hptr, g_sptr, meshW, meshH); // DEPRECATED: cutilSafeCall(cudaGLUnmapBufferObject(slopeVertexBuffer)); cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_slopeVB_resource, 0)); // DEPRECATED: cutilSafeCall(cudaGLUnmapBufferObject(heightVertexBuffer)); cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_heightVB_resource, 0)); }
void WaterPlaneCUDA::update() { glBindBuffer(GL_ARRAY_BUFFER, oldVertexBuffer); float3* verticesTest = (float3*)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); for (int i = 0; i < disturbances.size();i++) { Disturbances *dist = disturbances.at(i); for(int x = dist->xminW; x <= dist->xmaxW; x++) { for (int y = dist->zminW; y <= dist->zmaxW; y++) { float insideCircle = ((x-dist->centerX)*(x-dist->centerX))+((y-dist->centerZ)*(y-dist->centerZ))-dist->radiusSQ; if (insideCircle <= 0) { int vIndex = (y * pointsX) + x; if (vIndex < (pointsX*pointsY)) { verticesTest[vIndex].y = (insideCircle/dist->radiusSQ)*dist->height; } } } } } glUnmapBufferARB(GL_ARRAY_BUFFER); disturbances.clear(); size_t num_bytes; cutilSafeCall(cudaGraphicsMapResources(1, &cuda_newVertex_resource, 0)); cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void**)&gpu_newVertices, &num_bytes, cuda_newVertex_resource)); cutilSafeCall(cudaGraphicsMapResources(1, &cuda_oldVertex_resource, 0)); cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void**)&gpu_oldVertices, &num_bytes, cuda_oldVertex_resource)); cutilSafeCall(cudaMemcpyToSymbol("DIM",&pointsX,sizeof(int))); updateWaveMapGPU1(gpu_newVertices,gpu_oldVertices); cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_newVertex_resource, 0)); cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_oldVertex_resource, 0)); cutilSafeCall(cudaGraphicsMapResources(1, &cuda_oldVertex_resource, 0)); cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void**)&gpu_oldVertices, &num_bytes, cuda_oldVertex_resource)); cutilSafeCall(cudaGraphicsMapResources(1, &cuda_normalsVB_resource, 0)); cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void**)&gpu_normals, &num_bytes, cuda_normalsVB_resource)); cutilSafeCall(cudaMemcpyToSymbol("DIM",&pointsX,sizeof(int))); updateNormalsGPU1(gpu_oldVertices,gpu_normals); cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_normalsVB_resource, 0)); cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_oldVertex_resource, 0)); //swap between old and new wave map struct cudaGraphicsResource *temp = cuda_oldVertex_resource; cuda_oldVertex_resource = cuda_newVertex_resource; cuda_newVertex_resource = temp; }
void runBenchmark(int iterations) { printf("[%s] (Benchmark Mode)\n", sSDKsample); sdkCreateTimer(&timer); uchar4 *d_output; checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0)); size_t num_bytes; checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&d_output, &num_bytes, cuda_pbo_resource)); sdkStartTimer(&timer); for (int i = 0; i < iterations; ++i) { render(imageWidth, imageHeight, tx, ty, scale, cx, cy, blockSize, gridSize, g_FilterMode, d_output); } cudaDeviceSynchronize(); sdkStopTimer(&timer); float time = sdkGetTimerValue(&timer) / (float) iterations; checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0)); printf("time: %0.3f ms, %f Mpixels/sec\n", time, (width*height / (time * 0.001f)) / 1e6); }
/* * Advance the simulation by <n> generations by mapping the OpenGL pixel buffer * objects for writing from CUDA, executing the kernel <n> times, and unmapping * the pixel buffer object. */ void advance_generations(unsigned long n) { uint8_t* device_bufs[2]; size_t size; DEBUG2("Mapping CUDA resources and retrieving device buffer pointers\n"); cudaGraphicsMapResources(2, cuda_graphics_resources, (cudaStream_t)0); cudaGraphicsResourceGetMappedPointer((void**)&device_bufs[0], &size, cuda_graphics_resources[0]); cudaGraphicsResourceGetMappedPointer((void**)&device_bufs[1], &size, cuda_graphics_resources[1]); check_cuda_error(); while (n--) { DEBUG2("Launching kernel (grid.width = %u, grid.height = %u)\n", grid.width, grid.height); launch_kernel(device_bufs[grid.which_buf], device_bufs[!grid.which_buf], grid.width, grid.height); grid.which_buf ^= 1; } DEBUG2("Unmapping CUDA resources\n"); cudaGraphicsUnmapResources(2, cuda_graphics_resources, (cudaStream_t)0); cudaStreamSynchronize(0); }
//----------------------------------------------------------------------------- void QGLImageGpuWidget::fillPbo(iu::ImageGpu_8u_C4* output) { // map GL <-> CUDA resource uchar4 *d_dst = NULL; size_t start; cudaGraphicsMapResources(1, &cuda_pbo_resource_, 0); cudaGraphicsResourceGetMappedPointer((void**)&d_dst, &start, cuda_pbo_resource_); // get image data iuprivate::cuCopyImageToPbo(image_, num_channels_, bit_depth_, d_dst, min_, max_); cudaThreadSynchronize(); // get overlays iuprivate::OverlayList::iterator it; for ( it=overlay_list_.begin() ; it != overlay_list_.end(); it++ ) if ((*it)->isActive()) cuCopyOverlayToPbo((*it), d_dst, image_->size()); cudaThreadSynchronize(); if (output != NULL) { // copy final pbo to output iu::ImageGpu_8u_C4 temp(d_dst, image_->width(), image_->height(), image_->width()*sizeof(uchar4), true); iu::copy(&temp, output); } // unmap GL <-> CUDA resource cudaGraphicsUnmapResources(1, &cuda_pbo_resource_, 0); }
/*Private Methods*/ void Screen::render() { iteration_count++; uchar4 * out_data = 0; cudaError_t cudaStatus; cudaStatus = cudaGraphicsMapResources(1, &cuda_pbo_resource, 0); if (cudaStatus != cudaSuccess) { fprintf(stderr, "Call to cudaGraphicsMapResources failed.\n"); } cudaStatus = cudaGraphicsResourceGetMappedPointer((void **)&out_data, NULL, cuda_pbo_resource); if (cudaStatus != cudaSuccess) { fprintf(stderr, "Call to cudaGraphicsResourceGetMappedPointer failed.\n"); } for (int i = 0; i < ITERATIONS_PER_RENDER; i++) { kernel_ptr->launchCalculations(out_data, MAX_WIN_WIDTH, MAX_WIN_HEIGHT); } std::cout << "Iteration number: " << iteration_count*ITERATIONS_PER_RENDER << std::endl; cudaStatus = cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0); if (cudaStatus != cudaSuccess) { fprintf(stderr, "Call to cudaGraphicsUnmapResources failed.\n"); } }
//////////////////////////////////////////////////////////////////////////////// //! Run the Cuda part of the computation //////////////////////////////////////////////////////////////////////////////// void runCuda() { cudaStream_t stream = 0; const int nbResources = 2; cudaGraphicsResource *ppResources[nbResources] = { g_histogram.cudaResource, g_color.cudaResource, }; // Map resources for Cuda checkCudaErrors(cudaGraphicsMapResources(nbResources, ppResources, stream)); getLastCudaError("cudaGraphicsMapResources(2) failed"); // Get pointers checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&g_histogram.cudaBuffer, &g_histogram.size, g_histogram.cudaResource)); getLastCudaError("cudaGraphicsResourceGetMappedPointer (g_color.pBuffer) failed"); cudaGraphicsSubResourceGetMappedArray(&g_color.pCudaArray, g_color.cudaResource, 0, 0); getLastCudaError("cudaGraphicsSubResourceGetMappedArray (g_color.pBuffer) failed"); // Execute kernel createHistogramTex(g_histogram.cudaBuffer, g_WindowWidth, g_WindowHeight, g_color.pCudaArray); checkCudaError(); // // unmap the resources // checkCudaErrors(cudaGraphicsUnmapResources(nbResources, ppResources, stream)); getLastCudaError("cudaGraphicsUnmapResources(2) failed"); }
void DirectXInterop::DrawFrame() { static float t = 0.0f; // Map the resources cudaGraphicsResource *resource = m_hdrTextureCuda->GetCurrentGraphicsResource(); CE(cudaGraphicsMapResources(1, &resource)); // Run the kernel RenderFrame(m_hdrTextureCuda->GetTextureData(), m_clientWidth, m_clientHeight, m_hdrTextureCuda->GetTexturePitch(), t); // Copy the frame over to the d3d texture m_hdrTextureCuda->CopyTextureDataToRegisteredResource(); // Unmap the resources CE(cudaGraphicsUnmapResources(1, &resource)); // Draw the frame to the screen m_immediateContext->VSSetShader(m_fullscreenTriangleVS, nullptr, 0u); m_immediateContext->PSSetShader(m_copyCudaOutputToBackbufferPS, nullptr, 0u); ID3D11ShaderResourceView *hdrSRV = m_hdrTextureD3D->GetShaderResource(); m_immediateContext->PSSetShaderResources(0, 1, &hdrSRV); m_immediateContext->Draw(3u, 0u); m_swapChain->Present(1u, 0u); t += 0.1f; }
void RGBDOdometry::populateRGBDData(GPUTexture * rgb, DeviceArray2D<float> * destDepths, DeviceArray2D<unsigned char> * destImages) { verticesToDepth(vmaps_tmp, destDepths[0], maxDepthRGB); for(int i = 0; i + 1 < NUM_PYRS; i++) { pyrDownGaussF(destDepths[i], destDepths[i + 1]); } cudaArray * textPtr; cudaGraphicsMapResources(1, &rgb->cudaRes); cudaGraphicsSubResourceGetMappedArray(&textPtr, rgb->cudaRes, 0, 0); imageBGRToIntensity(textPtr, destImages[0]); cudaGraphicsUnmapResources(1, &rgb->cudaRes); for(int i = 0; i + 1 < NUM_PYRS; i++) { pyrDownUcharGauss(destImages[i], destImages[i + 1]); } cudaDeviceSynchronize(); }
bool WorkingBuffers::copyToGL(struct cudaGraphicsResource* destination, TsOutputType outputType, int level, int styleIndex, float vizGain, bool vizNormalize, int vizMode) { if (!_is_initialized) return false; cudaArray* d_result; cudaError_t error = cudaGraphicsMapResources(1, &destination, 0); if (error != cudaSuccess) {std::cerr << "ERROR! GraphicsMapResources" << std::endl; } assert(error == cudaSuccess); error = cudaGraphicsSubResourceGetMappedArray(&d_result, destination, 0, 0); if (error != cudaSuccess) {std::cerr << "ERROR! GraphicsSubResourceGetMappedArray" << std::endl; } assert(error == cudaSuccess); if (_history_position > 0) level = _history_params[_history_position].level; copyOutputArray(outputType, level, d_result, styleIndex, vizGain, vizNormalize, vizMode); error = cudaGraphicsUnmapResources(1, &destination, 0); if (error != cudaSuccess) {std::cerr << "ERROR! GraphicsUnmapResources" << std::endl; } assert(error == cudaSuccess); return true; }
void RGBDOdometry::initICP(GPUTexture * filteredDepth, const float depthCutoff) { cudaArray * textPtr; cudaGraphicsMapResources(1, &filteredDepth->cudaRes); cudaGraphicsSubResourceGetMappedArray(&textPtr, filteredDepth->cudaRes, 0, 0); cudaMemcpy2DFromArray(depth_tmp[0].ptr(0), depth_tmp[0].step(), textPtr, 0, 0, depth_tmp[0].colsBytes(), depth_tmp[0].rows(), cudaMemcpyDeviceToDevice); cudaGraphicsUnmapResources(1, &filteredDepth->cudaRes); for(int i = 1; i < NUM_PYRS; ++i) { pyrDown(depth_tmp[i - 1], depth_tmp[i]); } for(int i = 0; i < NUM_PYRS; ++i) { createVMap(intr(i), depth_tmp[i], vmaps_curr_[i], depthCutoff); createNMap(vmaps_curr_[i], nmaps_curr_[i]); } cudaDeviceSynchronize(); }
//----------------------------------------------------------------------------- // Name: RunCUDA() // Desc: Launches the CUDA kernels to fill in the texture data //----------------------------------------------------------------------------- void RunCUDA() { // // map the resources we've registered so we can access them in Cuda // - it is most efficient to map and unmap all resources in a single call, // and to have the map/unmap calls be the boundary between using the GPU // for Direct3D and Cuda // if (!g_bDeviceLost) { cudaStream_t stream = 0; const int nbResources = 3; cudaGraphicsResource *ppResources[nbResources] = { g_texture_2d.cudaResource, g_texture_vol.cudaResource, g_texture_cube.cudaResource, }; cudaGraphicsMapResources(nbResources, ppResources, stream); getLastCudaError("cudaGraphicsMapResources(3) failed"); // // run kernels which will populate the contents of those textures // RunKernels(); // // unmap the resources // cudaGraphicsUnmapResources(nbResources, ppResources, stream); getLastCudaError("cudaGraphicsUnmapResources(3) failed"); } }
void displayFunc(void) { sdkStartTimer(&timer); TColor *d_dst = NULL; size_t num_bytes; if (frameCounter++ == 0) { sdkResetTimer(&timer); } // DEPRECATED: checkCudaErrors(cudaGLMapBufferObject((void**)&d_dst, gl_PBO)); checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0)); getLastCudaError("cudaGraphicsMapResources failed"); checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&d_dst, &num_bytes, cuda_pbo_resource)); getLastCudaError("cudaGraphicsResourceGetMappedPointer failed"); checkCudaErrors(CUDA_Bind2TextureArray()); runImageFilters(d_dst); checkCudaErrors(CUDA_UnbindTexture()); // DEPRECATED: checkCudaErrors(cudaGLUnmapBufferObject(gl_PBO)); checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0)); // Common display code path { glClear(GL_COLOR_BUFFER_BIT); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageW, imageH, GL_RGBA, GL_UNSIGNED_BYTE, BUFFER_DATA(0)); glBegin(GL_TRIANGLES); glTexCoord2f(0, 0); glVertex2f(-1, -1); glTexCoord2f(2, 0); glVertex2f(+3, -1); glTexCoord2f(0, 2); glVertex2f(-1, +3); glEnd(); glFinish(); } if (frameCounter == frameN) { frameCounter = 0; if (g_FPS) { printf("FPS: %3.1f\n", frameN / (sdkGetTimerValue(&timer) * 0.001)); g_FPS = false; } } glutSwapBuffers(); glutReportErrors(); sdkStopTimer(&timer); computeFPS(); }
void VBO::map() { size_t num_bytes; cutilSafeCall(cudaGraphicsMapResources(1, &cuda_vbo_resource)); cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void **)&dev_vbo, &num_bytes, cuda_vbo_resource)); }
// This is the normal display path void display(void) { cutilCheckError(cutStartTimer(timer)); // Sobel operation Pixel *data = NULL; // map PBO to get CUDA device pointer cutilSafeCall(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0)); size_t num_bytes; cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void **)&data, &num_bytes, cuda_pbo_resource)); //printf("CUDA mapped PBO: May access %ld bytes\n", num_bytes); sobelFilter(data, imWidth, imHeight, g_SobelDisplayMode, imageScale, blockOp, pointOp ); cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0)); glClear(GL_COLOR_BUFFER_BIT); glBindTexture(GL_TEXTURE_2D, texid); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imWidth, imHeight, GL_LUMINANCE, GL_UNSIGNED_BYTE, OFFSET(0)); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glDisable(GL_DEPTH_TEST); glEnable(GL_TEXTURE_2D); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glBegin(GL_QUADS); glVertex2f(0, 0); glTexCoord2f(0, 0); glVertex2f(0, 1); glTexCoord2f(1, 0); glVertex2f(1, 1); glTexCoord2f(1, 1); glVertex2f(1, 0); glTexCoord2f(0, 1); glEnd(); glBindTexture(GL_TEXTURE_2D, 0); if (g_CheckRender && g_CheckRender->IsQAReadback() && g_Verify) { printf("> (Frame %d) readback BackBuffer\n", frameCount); g_CheckRender->readback( imWidth, imHeight ); g_CheckRender->savePPM ( sOriginal_ppm[g_Index], true, NULL ); if (!g_CheckRender->PPMvsPPM(sOriginal_ppm[g_Index], sReference_ppm[g_Index], MAX_EPSILON_ERROR, 0.15f)) { g_TotalErrors++; } g_Verify = false; } glutSwapBuffers(); cutilCheckError(cutStopTimer(timer)); computeFPS(); glutPostRedisplay(); }
void * OsdCudaVertexBuffer::Map() { size_t num_bytes; void *ptr; cudaGraphicsMapResources(1, &_cudaResource, 0); cudaGraphicsResourceGetMappedPointer(&ptr, &num_bytes, _cudaResource); return ptr; }
std::pair<float *, unsigned int> VBOCudaMapper::mapVBOBuffer() { cudaCheck(cudaGraphicsMapResources(1, &cuda_graphics_resource_, nullptr)); float * cuda_data_ptr = nullptr; size_t byte_size; cudaCheck(cudaGraphicsResourceGetMappedPointer((void **)&cuda_data_ptr, &byte_size, cuda_graphics_resource_)); return{ cuda_data_ptr, byte_size }; }
void Canvas::paintGL() { glClear(GL_COLOR_BUFFER_BIT); if(!ready) return; size_t size; checkCudaErrors(cudaGraphicsMapResources(1, &resource, 0)); checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void**)&img, &size, resource)); if(renderMode == RENDER_MODE_RAYCASTING) { /*glEnable(GL_BLEND); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glBegin(GL_QUADS); glColor4f(0.4745f, 0.9294f, 0.8901f, 1.f); glVertex2f(1.f, 1.f); glColor4f(0.4745f, 0.9294f, 0.8901f, 1.f); glVertex2f(-1.f, 1.f); glColor4f(0.9490f, 0.9647f, 0.9803f, 1.f); glVertex2f(-1.f, -1.f); glColor4f(0.9490f, 0.9647f, 0.9803f, 1.f); glVertex2f(1.f, -1.f); glEnd();*/ render_raycasting(img, deviceVolume, transferFunction, camera, volumeReader.GetElementBoundingSphereRadius()); } else { render_pathtracer(img, renderParams); if(renderParams.frameNo == 0) { char* data = new char[WIDTH * HEIGHT * 4]; cudaMemcpy(data, img, sizeof(glm::u8vec4) * WIDTH * HEIGHT, cudaMemcpyDeviceToHost); stbi_write_tga("0.tga", WIDTH, HEIGHT, 4, data); delete []data; } } checkCudaErrors(cudaDeviceSynchronize()); checkCudaErrors(cudaGraphicsUnmapResources(1, &resource, 0)); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo); glDrawPixels(WIDTH, HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glDisable(GL_BLEND); renderParams.frameNo++; }
unsigned ogl_abstract_texture2d::load() { cudaGraphicsGLRegisterImage(cuda_resource_, gl_id_, GL_TEXTURE_2D, cudaGraphicsMapFlagsWriteDiscard); cudaGraphicsMapResources(1, &cuda_resource_); cudaGraphicsUnmapResources(1, &cuda_resource_); }
void CudaTexture::copyFrom(void * src, unsigned size) { std::cout<<" cu tex cpy "<<size<<"\n"; cudaArray * arr; cutilSafeCall(cudaGraphicsMapResources(1, &_cuda_tex_resource, 0)); cutilSafeCall(cudaGraphicsSubResourceGetMappedArray(&arr, _cuda_tex_resource, 0, 0)); cutilSafeCall(cudaMemcpyToArray(arr, 0, 0, src, size, cudaMemcpyDeviceToDevice)); cudaGraphicsUnmapResources(1, &_cuda_tex_resource, 0); }
void OsdCudaD3D11VertexBuffer::map() { if (_cudaBuffer) return; size_t num_bytes; void *ptr; cudaGraphicsMapResources(1, &_cudaResource, 0); cudaGraphicsResourceGetMappedPointer(&ptr, &num_bytes, _cudaResource); _cudaBuffer = ptr; }
// display results using OpenGL void display() { sdkStartTimer(&timer); // execute filter, writing results to pbo unsigned int *dResult; //DEPRECATED: checkCudaErrors( cudaGLMapBufferObject((void**)&d_result, pbo) ); checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0)); size_t num_bytes; checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&dResult, &num_bytes, cuda_pbo_resource)); bilateralFilterRGBA(dResult, width, height, euclidean_delta, filter_radius, iterations, kernel_timer); // DEPRECATED: checkCudaErrors(cudaGLUnmapBufferObject(pbo)); checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0)); // Common display code path { glClear(GL_COLOR_BUFFER_BIT); // load texture from pbo glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo); glBindTexture(GL_TEXTURE_2D, texid); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, 0); glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0); // fragment program is required to display floating point texture glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader); glEnable(GL_FRAGMENT_PROGRAM_ARB); glDisable(GL_DEPTH_TEST); glBegin(GL_QUADS); { glTexCoord2f(0, 0); glVertex2f(0, 0); glTexCoord2f(1, 0); glVertex2f(1, 0); glTexCoord2f(1, 1); glVertex2f(1, 1); glTexCoord2f(0, 1); glVertex2f(0, 1); } glEnd(); glBindTexture(GL_TEXTURE_TYPE, 0); glDisable(GL_FRAGMENT_PROGRAM_ARB); } glutSwapBuffers(); glutReportErrors(); sdkStopTimer(&timer); computeFPS(); }
GLuint CGLUtil::gpuMapRgb2PixelBufferObj(const cv::cuda::GpuMat& cvgmRGB_ ){ //http://rickarkin.blogspot.co.uk/2012/03/use-pbo-to-share-buffer-between-cuda.html int nPyrLevel_ = getLevel( cvgmRGB_.cols ); GLuint uTexture; // map OpenGL buffer object for writing from CUDA if (cvgmRGB_.channels() == 3) { uTexture = _auTexture[nPyrLevel_]; void *pDev; cudaSafeCall( cudaGraphicsMapResources(1, &_apResourceRGBPxielBO[nPyrLevel_], 0)); size_t nSize; cudaSafeCall( cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize , _apResourceRGBPxielBO[nPyrLevel_])); cv::cuda::GpuMat cvgmRGBA( cvgmRGB_.size(), CV_8UC3, pDev); cvgmRGB_.copyTo(cvgmRGBA); cudaSafeCall( cudaGraphicsUnmapResources(1, &_apResourceRGBPxielBO[nPyrLevel_], 0) ); //texture mapping glBindTexture( GL_TEXTURE_2D, uTexture); glBindBuffer ( GL_PIXEL_UNPACK_BUFFER_ARB, _auRGBPixelBO[nPyrLevel_]); glTexImage2D( GL_TEXTURE_2D, 0, GL_RGB, cvgmRGB_.cols, cvgmRGB_.rows, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); errorDetectorGL(); glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); glBindTexture(GL_TEXTURE_2D, 0); } else if (cvgmRGB_.channels()==1) { uTexture = _auGrayTexture[nPyrLevel_]; void *pDev; cudaSafeCall( cudaGraphicsMapResources(1, &_apResourceGrayPxielBO[nPyrLevel_], 0)); size_t nSize; cudaSafeCall( cudaGraphicsResourceGetMappedPointer((void **)&pDev, &nSize , _apResourceGrayPxielBO[nPyrLevel_])); cv::cuda::GpuMat cvgmRGBA( cvgmRGB_.size(), CV_8UC1, pDev); cvgmRGB_.copyTo(cvgmRGBA); cudaSafeCall( cudaGraphicsUnmapResources(1, &_apResourceGrayPxielBO[nPyrLevel_], 0) ); //texture mapping glBindTexture(GL_TEXTURE_2D, uTexture); glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, _auGrayPixelBO[nPyrLevel_]); glTexImage2D(GL_TEXTURE_2D, 0, GL_RED, cvgmRGB_.cols, cvgmRGB_.rows, 0, GL_RED, GL_UNSIGNED_BYTE, NULL); errorDetectorGL(); glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); glBindTexture(GL_TEXTURE_2D, 0); } return uTexture; }//gpuMapRgb2PixelBufferObj
void RunCuda(struct cudaGraphicsResource **resource) { // map OpenGL buffer object for writing from CUDA checkCudaErrors(cudaGraphicsMapResources(1, resource, 0), exit(0)); float4 *devPtr; size_t size; checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&devPtr, &size, *resource), exit(0)); //printf("CUDA mapped VBO: May access %ld bytes\n", size); launch_kernel(devPtr, MeshWidth, MeshHeight, _anim); // unmap buffer object checkCudaErrors(cudaGraphicsUnmapResources(1, resource, 0), exit(0)); }
void render() { uchar4 *d_out = 0; cudaGraphicsMapResources(1, &cuda_pbo_resource, 0); cudaGraphicsResourceGetMappedPointer((void **)&d_out, NULL, cuda_pbo_resource); kernelLauncher(d_out, W, H, param, sys); cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0); // update contents of the title bar char title[64]; sprintf(title, "Stability: param = %.1f, sys = %d", param, sys); glutSetWindowTitle(title); }
void CUDARenderer::draw(const glm::mat4 &View, const glm::mat4 &Projection) { setCameraInfo(camera.cam); gpuErrchk(cudaGraphicsMapResources(1, &cudaSurfRes)); { cudaArray *viewCudaArray; gpuErrchk(cudaGraphicsSubResourceGetMappedArray(&viewCudaArray, cudaSurfRes, 0, 0)); renderToTexture(dimBlock, dimGrid, d_scene, viewCudaArray); } gpuErrchk(cudaGraphicsUnmapResources(1, &cudaSurfRes)); glDrawArrays(GL_TRIANGLES, 0, 6); }
T* BodySystemGPU<T>::getArray(BodyArray array) { assert(m_bInitialized); T *hdata = 0; T *ddata = 0; cudaGraphicsResource *pgres = nullptr; int currentReadHost = m_bUseSysMem ? m_currentRead : 0; switch (array) { default: case BODYSYSTEM_POSITION: hdata = m_hPos[currentReadHost]; ddata = m_deviceData[0].dPos[m_currentRead]; if (m_bUsePBO) { pgres = m_pGRes[m_currentRead]; } break; case BODYSYSTEM_VELOCITY: hdata = m_hVel; ddata = m_deviceData[0].dVel; break; } if (!m_bUseSysMem) { if (pgres) { checkCudaErrors(cudaGraphicsResourceSetMapFlags(pgres, cudaGraphicsMapFlagsReadOnly)); checkCudaErrors(cudaGraphicsMapResources(1, &pgres, 0)); size_t bytes; checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&ddata, &bytes, pgres)); } checkCudaErrors(cudaMemcpy(hdata, ddata, m_numBodies*4*sizeof(T), cudaMemcpyDeviceToHost)); if (pgres) { checkCudaErrors(cudaGraphicsUnmapResources(1, &pgres, 0)); } } return hdata; }
void TriangleMesh::bind(TriangleMeshData& data) { data.numVertices = 0; data.numIndices = 0; data.maxNumVertices = static_cast<unsigned int>(maxNumVertices); data.maxNumIndices = static_cast<unsigned int>(maxNumIndices); if (!bound) { if (!registered) { PGA_CUDA_checkedCall(cudaGraphicsGLRegisterBuffer(&cudaVertexAttributesBuffer, vertexAttributesBuffer, cudaGraphicsMapFlagsReadOnly /* cudaGraphicsMapFlagsWriteDiscard */)); PGA_CUDA_checkedCall(cudaGraphicsGLRegisterBuffer(&cudaIndexBuffer, indexBuffer, cudaGraphicsMapFlagsReadOnly /* cudaGraphicsMapFlagsWriteDiscard */)); registered = true; } PGA_CUDA_checkedCall(cudaGraphicsMapResources(1, &cudaVertexAttributesBuffer, 0)); PGA_CUDA_checkedCall(cudaGraphicsMapResources(1, &cudaIndexBuffer, 0)); bound = true; } size_t size; PGA_CUDA_checkedCall(cudaGraphicsResourceGetMappedPointer((void**)&data.verticesAttributes, &size, cudaVertexAttributesBuffer)); PGA_CUDA_checkedCall(cudaGraphicsResourceGetMappedPointer((void**)&data.indices, &size, cudaIndexBuffer)); }
void TriangleMesh::bind(TriangleMeshData& data) { data.numVertices = 0; data.numIndices = 0; data.maxNumVertices = static_cast<unsigned int>(maxNumVertices); data.maxNumIndices = static_cast<unsigned int>(maxNumIndices); if (!bound) { if (!registered) { PGA_CUDA_checkedCall(cudaGraphicsD3D11RegisterResource(&cudaVertexAttributesBuffer, vertexAttributesBuffer, cudaGraphicsRegisterFlagsNone /* the only flag working for now */)); PGA_CUDA_checkedCall(cudaGraphicsD3D11RegisterResource(&cudaIndexBuffer, indexBuffer, cudaGraphicsRegisterFlagsNone /* the only flag working for now */)); registered = true; } PGA_CUDA_checkedCall(cudaGraphicsMapResources(1, &cudaVertexAttributesBuffer, 0)); PGA_CUDA_checkedCall(cudaGraphicsMapResources(1, &cudaIndexBuffer, 0)); bound = true; } size_t size; PGA_CUDA_checkedCall(cudaGraphicsResourceGetMappedPointer((void**)&data.verticesAttributes, &size, cudaVertexAttributesBuffer)); PGA_CUDA_checkedCall(cudaGraphicsResourceGetMappedPointer((void**)&data.indices, &size, cudaIndexBuffer)); }
void RGBDOdometry::initICPModel(GPUTexture * predictedVertices, GPUTexture * predictedNormals, const float depthCutoff, const Eigen::Matrix4f & modelPose) { cudaArray * textPtr; cudaGraphicsMapResources(1, &predictedVertices->cudaRes); cudaGraphicsSubResourceGetMappedArray(&textPtr, predictedVertices->cudaRes, 0, 0); cudaMemcpyFromArray(vmaps_tmp.ptr(), textPtr, 0, 0, vmaps_tmp.sizeBytes(), cudaMemcpyDeviceToDevice); cudaGraphicsUnmapResources(1, &predictedVertices->cudaRes); cudaGraphicsMapResources(1, &predictedNormals->cudaRes); cudaGraphicsSubResourceGetMappedArray(&textPtr, predictedNormals->cudaRes, 0, 0); cudaMemcpyFromArray(nmaps_tmp.ptr(), textPtr, 0, 0, nmaps_tmp.sizeBytes(), cudaMemcpyDeviceToDevice); cudaGraphicsUnmapResources(1, &predictedNormals->cudaRes); copyMaps(vmaps_tmp, nmaps_tmp, vmaps_g_prev_[0], nmaps_g_prev_[0]); for(int i = 1; i < NUM_PYRS; ++i) { resizeVMap(vmaps_g_prev_[i - 1], vmaps_g_prev_[i]); resizeNMap(nmaps_g_prev_[i - 1], nmaps_g_prev_[i]); } Eigen::Matrix<float, 3, 3, Eigen::RowMajor> Rcam = modelPose.topLeftCorner(3, 3); Eigen::Vector3f tcam = modelPose.topRightCorner(3, 1); mat33 device_Rcam = Rcam; float3 device_tcam = *reinterpret_cast<float3*>(tcam.data()); for(int i = 0; i < NUM_PYRS; ++i) { tranformMaps(vmaps_g_prev_[i], nmaps_g_prev_[i], device_Rcam, device_tcam, vmaps_g_prev_[i], nmaps_g_prev_[i]); } cudaDeviceSynchronize(); }