InteropResource::~InteropResource() { //CUDA_WARN(cuCtxPushCurrent(ctx)); //error invalid value if (res[0].cuRes) CUDA_WARN(cuGraphicsUnregisterResource(res[0].cuRes)); if (res[1].cuRes) CUDA_WARN(cuGraphicsUnregisterResource(res[1].cuRes)); if (res[0].stream) CUDA_WARN(cuStreamDestroy(res[0].stream)); if (res[1].stream) CUDA_WARN(cuStreamDestroy(res[1].stream)); // FIXME: we own the context. But why crash to destroy ctx? CUDA_ERROR_INVALID_VALUE //CUDA_ENSURE(cuCtxDestroy(ctx)); }
bool GLInteropResource::ensureResource(int w, int h, int H, GLuint tex, int plane) { Q_ASSERT(plane < 2 && "plane number must be 0 or 1 for NV12"); TexRes &r = res[plane]; if (r.texture == tex && r.w == w && r.h == h && r.H == H && r.cuRes) return true; if (!ctx) { // TODO: how to use pop/push decoder's context without the context in opengl context CUDA_ENSURE(cuCtxCreate(&ctx, CU_CTX_SCHED_BLOCKING_SYNC, dev), false); if (USE_STREAM) { CUDA_WARN(cuStreamCreate(&res[0].stream, CU_STREAM_DEFAULT)); CUDA_WARN(cuStreamCreate(&res[1].stream, CU_STREAM_DEFAULT)); } qDebug("cuda contex on gl thread: %p", ctx); CUDA_ENSURE(cuCtxPopCurrent(&ctx), false); // TODO: why cuMemcpy2D need this } if (r.cuRes) { CUDA_ENSURE(cuGraphicsUnregisterResource(r.cuRes), false); r.cuRes = NULL; } // CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD works too for opengl, but not d3d CUDA_ENSURE(cuGraphicsGLRegisterImage(&r.cuRes, tex, GL_TEXTURE_2D, CU_GRAPHICS_REGISTER_FLAGS_NONE), false); r.texture = tex; r.w = w; r.h = h; r.H = H; return true; }
//---------------------------------------------------------------- /// setup for interop : prt refers to a texture in which we write /// results. This must be seen as a linear buffer. So we need to /// allocate a temporary linear buffer that will be then copied /// back to the texture // bool ResourceCUDA::setupAsCUDATarget() { int fmtSz = ResourceFormatByteSize(m_creationData.fmt); CUresult res; m_xByteSz = m_creationData.sz[0] * fmtSz; m_size = m_xByteSz * m_creationData.sz[1]; if(m_dptr) { res = cuMemFree(m_dptr); if(res) return false; } res = cuMemAllocPitch( &m_dptr, &m_pitch, m_xByteSz, m_creationData.sz[1], 4); if(res) return false; float pitchToSendToKernel = (float)m_pitch / (float)fmtSz; #pragma MESSAGE("TODO TODO TODO TODO TODO TODO : send pitch to the kernel !") LOGI("Event>>cuMemAllocPitch : Pitch of Target buffer (%d, %d) allocation = %d, %f\n", m_creationData.sz[0], m_xByteSz, m_pitch, pitchToSendToKernel); // // Register the texture to CUDA to be able to copy data back in it // GLenum target; if(m_cudaResource) { res = cuGraphicsUnregisterResource(m_cudaResource); if(res) return false; } switch(m_type) { case RESTEX_1D: target = GL_TEXTURE_1D; break; case RESTEX_2D: target = GL_TEXTURE_2D; break; case RESTEX_2DRECT: case RESRBUF_2D: case RESOURCE_UNKNOWN: //case RESTEX_3D: //case RESTEX_CUBE_MAP: default: LOGE("Failed to register the resource %s for CUDA : may be a render buffer\n", m_name.c_str()); return false; }; res = cuGraphicsGLRegisterImage( &m_cudaResource, m_OGLId, target, CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD ); if(res) { LOGE("Failed to register the texture %s for CUDA (as write discard)\n", m_name.c_str()); return 0; } return true; }
bool EGLInteropResource::ensureD3D9CUDA(int w, int h, int W, int H) { TexRes &r = res[0];// 1 NV12 texture if (r.w == w && r.h == h && r.W == W && r.H == H && r.cuRes) return true; if (!ctx) { // TODO: how to use pop/push decoder's context without the context in opengl context if (!ensureD3DDevice()) return false; // CUdevice is different from decoder's CUDA_ENSURE(cuD3D9CtxCreate(&ctx, &dev, CU_CTX_SCHED_BLOCKING_SYNC, device9), false); #if USE_STREAM CUDA_WARN(cuStreamCreate(&res[0].stream, CU_STREAM_DEFAULT)); CUDA_WARN(cuStreamCreate(&res[1].stream, CU_STREAM_DEFAULT)); #endif //USE_STREAM qDebug("cuda contex on gl thread: %p", ctx); CUDA_ENSURE(cuCtxPopCurrent(&ctx), false); // TODO: why cuMemcpy2D need this } if (r.cuRes) { CUDA_ENSURE(cuGraphicsUnregisterResource(r.cuRes), false); r.cuRes = NULL; } // create d3d resource for interop if (!surface9_nv12) { // TODO: need pitch from cuvid to ensure cuMemcpy2D can copy the whole pitch DX_ENSURE(device9->CreateTexture(W //, H , H*3/2 , 1 , D3DUSAGE_DYNAMIC //D3DUSAGE_DYNAMIC is lockable // 0 is from NV example. cudaD3D9.h says The primary rendertarget may not be registered with CUDA. So can not be D3DUSAGE_RENDERTARGET? //, D3DUSAGE_RENDERTARGET , D3DFMT_L8 //, (D3DFORMAT)MAKEFOURCC('N','V','1','2') // can not create nv12. use 2 textures L8+A8L8? , D3DPOOL_DEFAULT // must be D3DPOOL_DEFAULT for cuda? , &texture9_nv12 , NULL) // - Resources allocated as shared may not be registered with CUDA. , false); DX_ENSURE(device9->CreateOffscreenPlainSurface(W, H, (D3DFORMAT)MAKEFOURCC('N','V','1','2'), D3DPOOL_DEFAULT, &surface9_nv12, NULL), false); } // TODO: cudaD3D9.h says NV12 is not supported // CUDA_ERROR_INVALID_HANDLE if register D3D9 surface // TODO: why flag CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD is invalid while it's fine for opengl CUDA_ENSURE(cuGraphicsD3D9RegisterResource(&r.cuRes, texture9_nv12, CU_GRAPHICS_REGISTER_FLAGS_NONE), false); return true; }
CudaSurface::~CudaSurface(void) { getGLTexture(); // unmap cuGraphicsUnregisterResource(m_cudaResource); glDeleteTextures(1, &m_glTexture); }