bool GLInteropResource::ensureResource(int w, int h, int H, GLuint tex, int plane) { Q_ASSERT(plane < 2 && "plane number must be 0 or 1 for NV12"); TexRes &r = res[plane]; if (r.texture == tex && r.w == w && r.h == h && r.H == H && r.cuRes) return true; if (!ctx) { // TODO: how to use pop/push decoder's context without the context in opengl context CUDA_ENSURE(cuCtxCreate(&ctx, CU_CTX_SCHED_BLOCKING_SYNC, dev), false); if (USE_STREAM) { CUDA_WARN(cuStreamCreate(&res[0].stream, CU_STREAM_DEFAULT)); CUDA_WARN(cuStreamCreate(&res[1].stream, CU_STREAM_DEFAULT)); } qDebug("cuda contex on gl thread: %p", ctx); CUDA_ENSURE(cuCtxPopCurrent(&ctx), false); // TODO: why cuMemcpy2D need this } if (r.cuRes) { CUDA_ENSURE(cuGraphicsUnregisterResource(r.cuRes), false); r.cuRes = NULL; } // CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD works too for opengl, but not d3d CUDA_ENSURE(cuGraphicsGLRegisterImage(&r.cuRes, tex, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_NONE), false); r.texture = tex; r.w = w; r.h = h; r.H = H; return true; }
//---------------------------------------------------------------- /// setup for interop : prt refers to a texture in which we write /// results. This must be seen as a linear buffer. So we need to /// allocate a temporary linear buffer that will be then copied /// back to the texture // bool ResourceCUDA::setupAsCUDATarget() { int fmtSz = ResourceFormatByteSize(m_creationData.fmt); CUresult res; m_xByteSz = m_creationData.sz[0] * fmtSz; m_size = m_xByteSz * m_creationData.sz[1]; if(m_dptr) { res = cuMemFree(m_dptr); if(res) return false; } res = cuMemAllocPitch( &m_dptr, &m_pitch, m_xByteSz, m_creationData.sz[1], 4); if(res) return false; float pitchToSendToKernel = (float)m_pitch / (float)fmtSz; #pragma MESSAGE("TODO TODO TODO TODO TODO TODO : send pitch to the kernel !") LOGI("Event>>cuMemAllocPitch : Pitch of Target buffer (%d, %d) allocation = %d, %f\n", m_creationData.sz[0], m_xByteSz, m_pitch, pitchToSendToKernel); // // Register the texture to CUDA to be able to copy data back in it // GLenum target; if(m_cudaResource) { res = cuGraphicsUnregisterResource(m_cudaResource); if(res) return false; } switch(m_type) { case RESTEX_1D: target = GL_TEXTURE_1D; break; case RESTEX_2D: target = GL_TEXTURE_2D; break; case RESTEX_2DRECT: case RESRBUF_2D: case RESOURCE_UNKNOWN: //case RESTEX_3D: //case RESTEX_CUBE_MAP: default: LOGE("Failed to register the resource %s for CUDA : may be a render buffer\n", m_name.c_str()); return false; }; res = cuGraphicsGLRegisterImage( &m_cudaResource, m_OGLId, target, CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD ); if(res) { LOGE("Failed to register the texture %s for CUDA (as write discard)\n", m_name.c_str()); return 0; } return true; }
//---------------------------------------------------------------- /// Prepare what is needed to perform interop so the OGL texture /// can be exposed to CUDA bool ResourceCUDA::setupAsCUDATexture() { CUresult res; GLenum target; if(m_cudaResource) return true; // let's not unregister if the resource is here. We assume m_cudaResource is valid if non NULL //res = cuGraphicsUnregisterResource(m_cudaResource); if(m_OGLId == 0) validate(); switch(m_type) { case RESTEX_1D: target = GL_TEXTURE_1D; break; case RESTEX_2D: target = GL_TEXTURE_2D; break; case RESTEX_2DRECT: LOGI("WARNING>> RESTEX_2DRECT unhandled by CUDA"); return false; case RESRBUF_2D: LOGI("WARNING>> RESRBUF_2D unhandled by CUDA"); return false; case RESTEX_3D: LOGI("WARNING>> RESTEX_3D unhandled by CUDA"); return false; case RESTEX_CUBE_MAP: LOGI("WARNING>> RESTEX_CUBE_MAP unhandled by CUDA"); return false; case RESOURCE_UNKNOWN: default: assert(!"Resource unknown and unhandled by CUDA"); return false; }; res = cuGraphicsGLRegisterImage( &m_cudaResource, m_OGLId, target, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY ); if(res) { LOGE("Failed to register the texture %s for CUDA (as Read only)\n", m_name.c_str()); return 0; } return true; }
CudaSurface::CudaSurface(const Vec2i& size, Format format, int numSamples) : m_isMapped (false), m_cudaArray (0) { // Check parameters. if (min(size) <= 0) { fail("CudaSurface: Size must be positive!"); } if (max(size) > CR_MAXVIEWPORT_SIZE) { fail("CudaSurface: CR_MAXVIEWPORT_SIZE exceeded!"); } if (format < 0 || format >= NUM_FORMAT) { fail("CudaSurface: Invalid format!"); } if (numSamples > 8) { fail("CudaSurface: numSamples cannot exceed 8!"); } if (numSamples < 1 || popc8(numSamples) != 1) { fail("CudaSurface: numSamples must be a power of two!"); } // Identify format. int glInternal, glFormat, glType; switch (format) { case FORMAT_RGBA8: glInternal = GL_RGBA; glFormat = GL_RGBA; glType = GL_UNSIGNED_BYTE; break; case FORMAT_DEPTH32: glInternal = GL_LUMINANCE32UI_EXT; glFormat = GL_LUMINANCE_INTEGER_EXT; glType = GL_UNSIGNED_INT; break; default: FW_ASSERT(false); return; } // Initialize. m_size = size; m_roundedSize = (size + CR_TILE_SIZE - 1) & -CR_TILE_SIZE; m_textureSize = m_roundedSize * Vec2i(numSamples, 1); m_format = format; m_numSamples = numSamples; // Create GL texture. glGenTextures(1, &m_glTexture); glBindTexture(GL_TEXTURE_2D, m_glTexture); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexImage2D(GL_TEXTURE_2D, 0, glInternal, m_textureSize.x, m_textureSize.y, 0, glFormat, glType, NULL); GLContext::checkErrors(); // Register to CUDA. CudaModule::staticInit(); // CUresult res = cuGraphicsGLRegisterImage( &m_cudaResource, m_glTexture, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST); CudaModule::checkError("cuGraphicsGLRegisterImage", res); }