CUarray ResourceCUDA::mapResource(CUstream streamID) { CUresult err; if(m_cudaResource == NULL) { if(!setupAsCUDATexture()) return NULL; } err = cuGraphicsMapResources( 1, &m_cudaResource, streamID ); if(err == CUDA_ERROR_ALREADY_MAPPED) { LOGI("Warning>> cuGraphicsMapResources was already done for %s. Often happens if setting more than 1 time a texture for some resource\n", m_name.c_str()); } else if(err != CUDA_SUCCESS) { LOGE("Error>> CUDA failed map some source resources\n"); return NULL; } CUarray cuArray; err = cuGraphicsSubResourceGetMappedArray( &cuArray, m_cudaResource, 0/*arrayIndex*/, 0/*mipLevel*/); if(err) { LOGE("Error>> CUDA failed to get mapped resources pointer\n"); err = cuGraphicsUnmapResources( 1, &m_cudaResource, streamID ); return NULL; } return cuArray; }
bool ResourceCUDA::unmapResource(CUstream streamID) { CUresult err; if(m_cudaResource == NULL) return false; err = cuGraphicsUnmapResources( 1, &m_cudaResource, streamID ); if(err) return false; return true; }
GLuint CudaSurface::getGLTexture(void) { if (m_isMapped) { CUresult res = cuGraphicsUnmapResources(1u, &m_cudaResource, NULL); CudaModule::checkError("cuGraphicsUnmapResources", res); m_isMapped = false; } return m_glTexture; }
bool GLInteropResource::unmap(GLuint tex) { Q_UNUSED(tex); if (WORKAROUND_UNMAP_CONTEXT_SWITCH) return true; int plane = -1; if (res[0].texture == tex) plane = 0; else if (res[1].texture == tex) plane = 1; else return false; // FIXME: why cuCtxPushCurrent gives CUDA_ERROR_INVALID_CONTEXT if opengl viewport changed? CUDA_WARN(cuCtxPushCurrent(ctx)); CUDA_WARN(cuStreamSynchronize(res[plane].stream)); // FIXME: need a correct context. But why we have to push context even though map/unmap are called in the same thread // Because the decoder switch the context in another thread so we have to switch the context back? // to workaround the context issue, we must pop the context that valid in map() and push it here CUDA_ENSURE(cuGraphicsUnmapResources(1, &res[plane].cuRes, 0), false); CUDA_ENSURE(cuCtxPopCurrent(&ctx), false); return true; }
bool GLInteropResource::map(int picIndex, const CUVIDPROCPARAMS ¶m, GLuint tex, int w, int h, int H, int plane) { AutoCtxLock locker((cuda_api*)this, lock); Q_UNUSED(locker); if (!ensureResource(w, h, H, tex, plane)) // TODO surface size instead of frame size because we copy the device data return false; //CUDA_ENSURE(cuCtxPushCurrent(ctx), false); CUdeviceptr devptr; unsigned int pitch; CUDA_ENSURE(cuvidMapVideoFrame(dec, picIndex, &devptr, &pitch, const_cast<CUVIDPROCPARAMS*>(¶m)), false); CUVIDAutoUnmapper unmapper(this, dec, devptr); Q_UNUSED(unmapper); // TODO: why can not use res[plane].stream? CUDA_ERROR_INVALID_HANDLE CUDA_ENSURE(cuGraphicsMapResources(1, &res[plane].cuRes, 0), false); CUarray array; CUDA_ENSURE(cuGraphicsSubResourceGetMappedArray(&array, res[plane].cuRes, 0, 0), false); CUDA_MEMCPY2D cu2d; memset(&cu2d, 0, sizeof(cu2d)); cu2d.srcDevice = devptr; cu2d.srcMemoryType = CU_MEMORYTYPE_DEVICE; cu2d.srcPitch = pitch; cu2d.dstArray = array; cu2d.dstMemoryType = CU_MEMORYTYPE_ARRAY; cu2d.dstPitch = pitch; // the whole size or copy size? cu2d.WidthInBytes = pitch; cu2d.Height = h; if (plane == 1) { cu2d.srcXInBytes = 0;// +srcY*srcPitch + srcXInBytes cu2d.srcY = H; // skip the padding height cu2d.Height /= 2; } if (res[plane].stream) CUDA_ENSURE(cuMemcpy2DAsync(&cu2d, res[plane].stream), false); else CUDA_ENSURE(cuMemcpy2D(&cu2d), false); //TODO: delay cuCtxSynchronize && unmap. do it in unmap(tex)? // map to an already mapped resource will crash. sometimes I can not unmap the resource in unmap(tex) because if context switch error // so I simply unmap the resource here if (WORKAROUND_UNMAP_CONTEXT_SWITCH) { if (res[plane].stream) { //CUDA_WARN(cuCtxSynchronize(), false); //wait too long time? use cuStreamQuery? CUDA_WARN(cuStreamSynchronize(res[plane].stream)); //slower than CtxSynchronize } /* * This function provides the synchronization guarantee that any CUDA work issued * in \p stream before ::cuGraphicsUnmapResources() will complete before any * subsequently issued graphics work begins. * The graphics API from which \p resources were registered * should not access any resources while they are mapped by CUDA. If an * application does so, the results are undefined. */ CUDA_ENSURE(cuGraphicsUnmapResources(1, &res[plane].cuRes, 0), false); } else { // call it at last. current context will be used by other cuda calls (unmap() for example) CUDA_ENSURE(cuCtxPopCurrent(&ctx), false); // not required } return true; }
bool EGLInteropResource::map(int picIndex, const CUVIDPROCPARAMS ¶m, GLuint tex, int w, int h, int H, int plane) { // plane is always 0 because frame is rgb AutoCtxLock locker((cuda_api*)this, lock); Q_UNUSED(locker); if (!ensureResource(w, h, param.Reserved[0], H, tex)) // TODO surface size instead of frame size because we copy the device data return false; //CUDA_ENSURE(cuCtxPushCurrent(ctx), false); CUdeviceptr devptr; unsigned int pitch; CUDA_ENSURE(cuvidMapVideoFrame(dec, picIndex, &devptr, &pitch, const_cast<CUVIDPROCPARAMS*>(¶m)), false); CUVIDAutoUnmapper unmapper(this, dec, devptr); Q_UNUSED(unmapper); // TODO: why can not use res[plane].stream? CUDA_ERROR_INVALID_HANDLE CUDA_ENSURE(cuGraphicsMapResources(1, &res[plane].cuRes, 0), false); CUarray array; CUDA_ENSURE(cuGraphicsSubResourceGetMappedArray(&array, res[plane].cuRes, 0, 0), false); CUDA_ENSURE(cuGraphicsUnmapResources(1, &res[plane].cuRes, 0), false); // mapped array still accessible! CUDA_MEMCPY2D cu2d; memset(&cu2d, 0, sizeof(cu2d)); // Y plane cu2d.srcDevice = devptr; cu2d.srcMemoryType = CU_MEMORYTYPE_DEVICE; cu2d.srcPitch = pitch; cu2d.dstArray = array; cu2d.dstMemoryType = CU_MEMORYTYPE_ARRAY; cu2d.dstPitch = pitch; // the whole size or copy size? cu2d.WidthInBytes = res[plane].W; // the same value as texture9_nv12 cu2d.Height = H*3/2; if (res[plane].stream) CUDA_ENSURE(cuMemcpy2DAsync(&cu2d, res[plane].stream), false); else CUDA_ENSURE(cuMemcpy2D(&cu2d), false); //TODO: delay cuCtxSynchronize && unmap. do it in unmap(tex)? // map to an already mapped resource will crash. sometimes I can not unmap the resource in unmap(tex) because if context switch error // so I simply unmap the resource here if (WORKAROUND_UNMAP_CONTEXT_SWITCH) { if (res[plane].stream) { //CUDA_WARN(cuCtxSynchronize(), false); //wait too long time? use cuStreamQuery? CUDA_WARN(cuStreamSynchronize(res[plane].stream)); //slower than CtxSynchronize } /* * This function provides the synchronization guarantee that any CUDA work issued * in \p stream before ::cuGraphicsUnmapResources() will complete before any * subsequently issued graphics work begins. * The graphics API from which \p resources were registered * should not access any resources while they are mapped by CUDA. If an * application does so, the results are undefined. */ // CUDA_ENSURE(cuGraphicsUnmapResources(1, &res[plane].cuRes, 0), false); } D3DLOCKED_RECT rect_src, rect_dst; DX_ENSURE(texture9_nv12->LockRect(0, &rect_src, NULL, D3DLOCK_READONLY), false); DX_ENSURE(surface9_nv12->LockRect(&rect_dst, NULL, D3DLOCK_DISCARD), false); memcpy(rect_dst.pBits, rect_src.pBits, res[plane].W*H*3/2); // exactly w and h DX_ENSURE(surface9_nv12->UnlockRect(), false); DX_ENSURE(texture9_nv12->UnlockRect(0), false); #if 0 //IDirect3DSurface9 *raw_surface = NULL; //DX_ENSURE(texture9_nv12->GetSurfaceLevel(0, &raw_surface), false); const RECT src = { 0, 0, w, h*3/2}; DX_ENSURE(device9->StretchRect(raw_surface, &src, surface9_nv12, NULL, D3DTEXF_NONE), false); #endif if (!map(surface9_nv12, tex, w, h, H)) return false; return true; }
bool ResourceCUDA::updateResourceFromCUDA(CUstream streamID) { CUresult res; res = cuGraphicsMapResources( 1, &m_cudaResource, streamID ); if(res) { LOGE("Error>> CUDA failed map some target resources\n"); return false; } // // Walk through output resources and perform the copies // CUarray cuArray; //# define DBGDUMMYCOPY # ifdef DBGDUMMYCOPY // interop has issues... let's compare with a copy to a basic cuda array int www = m_xByteSz/4; CUDA_ARRAY_DESCRIPTOR descr = { www,//unsigned int Width; m_creationData.sz[1],//unsigned int Height; CU_AD_FORMAT_UNSIGNED_INT8,//CUarray_format Format; 4//unsigned int NumChannels; }; res = cuArrayCreate(&cuArray, &descr); # else res = cuGraphicsSubResourceGetMappedArray( &cuArray, m_cudaResource, 0/*arrayIndex*/, 0/*mipLevel*/); # endif if(res) { res = cuGraphicsUnmapResources( 1, &m_cudaResource, streamID ); return false; } CUDA_MEMCPY2D copyInfo = { 0, ///< Source X in bytes 0, ///< Source Y CU_MEMORYTYPE_DEVICE,//< Source memory type (host, device, array) NULL, ///< Source host pointer m_dptr, ///< Source device pointer NULL, ///< Source array reference m_pitch, ///< Source pitch (ignored when src is array) 0, ///< Destination X in bytes 0, ///< Destination Y CU_MEMORYTYPE_ARRAY,///< Destination memory type (host, device, array) NULL, ///< Destination host pointer NULL, ///< Destination device pointer cuArray, ///< Destination array reference 0, ///< Destination pitch (ignored when dst is array) m_xByteSz, ///< Width of 2D memory copy in bytes m_creationData.sz[1] ///< Height of 2D memory copy }; //LOGI("cuMemcpy2D(): CU_MEMORYTYPE_DEVICE source=%x pitch=%d CU_MEMORYTYPE_ARRAY=%x widthBytes=%d height=%d\n",m_dptr, m_pitch, cuArray, m_xByteSz, m_creationData.sz[1]); res = cuMemcpy2D( ©Info ); if(res) { LOGE("Error>> CUDA failed to copy linear memory to texture (array memory)\n"); res = cuGraphicsUnmapResources( 1, &m_cudaResource, streamID ); return false; } # ifdef DBGDUMMYCOPY res = cuArrayDestroy(cuArray); # endif res = cuGraphicsUnmapResources( 1, &m_cudaResource, streamID ); if(res) { LOGE("Error>> CUDA failed unmap the resource for output result of the kernel\n"); return false; } return true; }