CUarray ResourceCUDA::mapResource(CUstream streamID) { CUresult err; if(m_cudaResource == NULL) { if(!setupAsCUDATexture()) return NULL; } err = cuGraphicsMapResources( 1, &m_cudaResource, streamID ); if(err == CUDA_ERROR_ALREADY_MAPPED) { LOGI("Warning>> cuGraphicsMapResources was already done for %s. Often happens if setting more than 1 time a texture for some resource\n", m_name.c_str()); } else if(err != CUDA_SUCCESS) { LOGE("Error>> CUDA failed map some source resources\n"); return NULL; } CUarray cuArray; err = cuGraphicsSubResourceGetMappedArray( &cuArray, m_cudaResource, 0/*arrayIndex*/, 0/*mipLevel*/); if(err) { LOGE("Error>> CUDA failed to get mapped resources pointer\n"); err = cuGraphicsUnmapResources( 1, &m_cudaResource, streamID ); return NULL; } return cuArray; }
CUarray CudaSurface::getCudaArray(void) { if (!m_isMapped) { CUresult res = cuGraphicsMapResources(1u, &m_cudaResource, NULL); CudaModule::checkError("cuGraphicsMapResources", res); res = cuGraphicsSubResourceGetMappedArray(&m_cudaArray, m_cudaResource, 0, 0); CudaModule::checkError("cuGraphicsSubResourceGetMappedArray", res); m_isMapped = true; } return m_cudaArray; }
bool GLInteropResource::map(int picIndex, const CUVIDPROCPARAMS ¶m, GLuint tex, int w, int h, int H, int plane) { AutoCtxLock locker((cuda_api*)this, lock); Q_UNUSED(locker); if (!ensureResource(w, h, H, tex, plane)) // TODO surface size instead of frame size because we copy the device data return false; //CUDA_ENSURE(cuCtxPushCurrent(ctx), false); CUdeviceptr devptr; unsigned int pitch; CUDA_ENSURE(cuvidMapVideoFrame(dec, picIndex, &devptr, &pitch, const_cast<CUVIDPROCPARAMS*>(¶m)), false); CUVIDAutoUnmapper unmapper(this, dec, devptr); Q_UNUSED(unmapper); // TODO: why can not use res[plane].stream? CUDA_ERROR_INVALID_HANDLE CUDA_ENSURE(cuGraphicsMapResources(1, &res[plane].cuRes, 0), false); CUarray array; CUDA_ENSURE(cuGraphicsSubResourceGetMappedArray(&array, res[plane].cuRes, 0, 0), false); CUDA_MEMCPY2D cu2d; memset(&cu2d, 0, sizeof(cu2d)); cu2d.srcDevice = devptr; cu2d.srcMemoryType = CU_MEMORYTYPE_DEVICE; cu2d.srcPitch = pitch; cu2d.dstArray = array; cu2d.dstMemoryType = CU_MEMORYTYPE_ARRAY; cu2d.dstPitch = pitch; // the whole size or copy size? cu2d.WidthInBytes = pitch; cu2d.Height = h; if (plane == 1) { cu2d.srcXInBytes = 0;// +srcY*srcPitch + srcXInBytes cu2d.srcY = H; // skip the padding height cu2d.Height /= 2; } if (res[plane].stream) CUDA_ENSURE(cuMemcpy2DAsync(&cu2d, res[plane].stream), false); else CUDA_ENSURE(cuMemcpy2D(&cu2d), false); //TODO: delay cuCtxSynchronize && unmap. do it in unmap(tex)? // map to an already mapped resource will crash. sometimes I can not unmap the resource in unmap(tex) because if context switch error // so I simply unmap the resource here if (WORKAROUND_UNMAP_CONTEXT_SWITCH) { if (res[plane].stream) { //CUDA_WARN(cuCtxSynchronize(), false); //wait too long time? use cuStreamQuery? CUDA_WARN(cuStreamSynchronize(res[plane].stream)); //slower than CtxSynchronize } /* * This function provides the synchronization guarantee that any CUDA work issued * in \p stream before ::cuGraphicsUnmapResources() will complete before any * subsequently issued graphics work begins. * The graphics API from which \p resources were registered * should not access any resources while they are mapped by CUDA. If an * application does so, the results are undefined. */ CUDA_ENSURE(cuGraphicsUnmapResources(1, &res[plane].cuRes, 0), false); } else { // call it at last. current context will be used by other cuda calls (unmap() for example) CUDA_ENSURE(cuCtxPopCurrent(&ctx), false); // not required } return true; }
bool EGLInteropResource::map(int picIndex, const CUVIDPROCPARAMS ¶m, GLuint tex, int w, int h, int H, int plane) { // plane is always 0 because frame is rgb AutoCtxLock locker((cuda_api*)this, lock); Q_UNUSED(locker); if (!ensureResource(w, h, param.Reserved[0], H, tex)) // TODO surface size instead of frame size because we copy the device data return false; //CUDA_ENSURE(cuCtxPushCurrent(ctx), false); CUdeviceptr devptr; unsigned int pitch; CUDA_ENSURE(cuvidMapVideoFrame(dec, picIndex, &devptr, &pitch, const_cast<CUVIDPROCPARAMS*>(¶m)), false); CUVIDAutoUnmapper unmapper(this, dec, devptr); Q_UNUSED(unmapper); // TODO: why can not use res[plane].stream? CUDA_ERROR_INVALID_HANDLE CUDA_ENSURE(cuGraphicsMapResources(1, &res[plane].cuRes, 0), false); CUarray array; CUDA_ENSURE(cuGraphicsSubResourceGetMappedArray(&array, res[plane].cuRes, 0, 0), false); CUDA_ENSURE(cuGraphicsUnmapResources(1, &res[plane].cuRes, 0), false); // mapped array still accessible! CUDA_MEMCPY2D cu2d; memset(&cu2d, 0, sizeof(cu2d)); // Y plane cu2d.srcDevice = devptr; cu2d.srcMemoryType = CU_MEMORYTYPE_DEVICE; cu2d.srcPitch = pitch; cu2d.dstArray = array; cu2d.dstMemoryType = CU_MEMORYTYPE_ARRAY; cu2d.dstPitch = pitch; // the whole size or copy size? cu2d.WidthInBytes = res[plane].W; // the same value as texture9_nv12 cu2d.Height = H*3/2; if (res[plane].stream) CUDA_ENSURE(cuMemcpy2DAsync(&cu2d, res[plane].stream), false); else CUDA_ENSURE(cuMemcpy2D(&cu2d), false); //TODO: delay cuCtxSynchronize && unmap. do it in unmap(tex)? // map to an already mapped resource will crash. sometimes I can not unmap the resource in unmap(tex) because if context switch error // so I simply unmap the resource here if (WORKAROUND_UNMAP_CONTEXT_SWITCH) { if (res[plane].stream) { //CUDA_WARN(cuCtxSynchronize(), false); //wait too long time? use cuStreamQuery? CUDA_WARN(cuStreamSynchronize(res[plane].stream)); //slower than CtxSynchronize } /* * This function provides the synchronization guarantee that any CUDA work issued * in \p stream before ::cuGraphicsUnmapResources() will complete before any * subsequently issued graphics work begins. * The graphics API from which \p resources were registered * should not access any resources while they are mapped by CUDA. If an * application does so, the results are undefined. */ // CUDA_ENSURE(cuGraphicsUnmapResources(1, &res[plane].cuRes, 0), false); } D3DLOCKED_RECT rect_src, rect_dst; DX_ENSURE(texture9_nv12->LockRect(0, &rect_src, NULL, D3DLOCK_READONLY), false); DX_ENSURE(surface9_nv12->LockRect(&rect_dst, NULL, D3DLOCK_DISCARD), false); memcpy(rect_dst.pBits, rect_src.pBits, res[plane].W*H*3/2); // exactly w and h DX_ENSURE(surface9_nv12->UnlockRect(), false); DX_ENSURE(texture9_nv12->UnlockRect(0), false); #if 0 //IDirect3DSurface9 *raw_surface = NULL; //DX_ENSURE(texture9_nv12->GetSurfaceLevel(0, &raw_surface), false); const RECT src = { 0, 0, w, h*3/2}; DX_ENSURE(device9->StretchRect(raw_surface, &src, surface9_nv12, NULL, D3DTEXF_NONE), false); #endif if (!map(surface9_nv12, tex, w, h, H)) return false; return true; }
bool ResourceCUDA::updateResourceFromCUDA(CUstream streamID) { CUresult res; res = cuGraphicsMapResources( 1, &m_cudaResource, streamID ); if(res) { LOGE("Error>> CUDA failed map some target resources\n"); return false; } // // Walk through output resources and perform the copies // CUarray cuArray; //# define DBGDUMMYCOPY # ifdef DBGDUMMYCOPY // interop has issues... let's compare with a copy to a basic cuda array int www = m_xByteSz/4; CUDA_ARRAY_DESCRIPTOR descr = { www,//unsigned int Width; m_creationData.sz[1],//unsigned int Height; CU_AD_FORMAT_UNSIGNED_INT8,//CUarray_format Format; 4//unsigned int NumChannels; }; res = cuArrayCreate(&cuArray, &descr); # else res = cuGraphicsSubResourceGetMappedArray( &cuArray, m_cudaResource, 0/*arrayIndex*/, 0/*mipLevel*/); # endif if(res) { res = cuGraphicsUnmapResources( 1, &m_cudaResource, streamID ); return false; } CUDA_MEMCPY2D copyInfo = { 0, ///< Source X in bytes 0, ///< Source Y CU_MEMORYTYPE_DEVICE,//< Source memory type (host, device, array) NULL, ///< Source host pointer m_dptr, ///< Source device pointer NULL, ///< Source array reference m_pitch, ///< Source pitch (ignored when src is array) 0, ///< Destination X in bytes 0, ///< Destination Y CU_MEMORYTYPE_ARRAY,///< Destination memory type (host, device, array) NULL, ///< Destination host pointer NULL, ///< Destination device pointer cuArray, ///< Destination array reference 0, ///< Destination pitch (ignored when dst is array) m_xByteSz, ///< Width of 2D memory copy in bytes m_creationData.sz[1] ///< Height of 2D memory copy }; //LOGI("cuMemcpy2D(): CU_MEMORYTYPE_DEVICE source=%x pitch=%d CU_MEMORYTYPE_ARRAY=%x widthBytes=%d height=%d\n",m_dptr, m_pitch, cuArray, m_xByteSz, m_creationData.sz[1]); res = cuMemcpy2D( ©Info ); if(res) { LOGE("Error>> CUDA failed to copy linear memory to texture (array memory)\n"); res = cuGraphicsUnmapResources( 1, &m_cudaResource, streamID ); return false; } # ifdef DBGDUMMYCOPY res = cuArrayDestroy(cuArray); # endif res = cuGraphicsUnmapResources( 1, &m_cudaResource, streamID ); if(res) { LOGE("Error>> CUDA failed unmap the resource for output result of the kernel\n"); return false; } return true; }