IDirect3DDevice9* CreateDevice9(HINSTANCE dll, IDirect3D9** d3d9, D3DADAPTER_IDENTIFIER9 *d3dai) { qDebug("creating d3d9 device..."); typedef IDirect3D9* (WINAPI *Create9Func)(UINT SDKVersion); Create9Func Create9 = (Create9Func)GetProcAddress(dll, "Direct3DCreate9"); if (!Create9) { qWarning("Symbol not found: Direct3DCreate9"); return NULL; } *d3d9 = Create9(D3D_SDK_VERSION); if (!(*d3d9)) { qWarning("Direct3DCreate9 failed"); return NULL; } if (d3dai) DX_WARN((*d3d9)->GetAdapterIdentifier(D3DADAPTER_DEFAULT, 0, d3dai)); D3DPRESENT_PARAMETERS d3dpp; InitParameters(&d3dpp); DWORD flags = D3DCREATE_FPU_PRESERVE | D3DCREATE_MULTITHREADED | D3DCREATE_MIXED_VERTEXPROCESSING; IDirect3DDevice9 *d3d9dev = NULL; DX_ENSURE(((*d3d9)->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, GetShellWindow(),// GetDesktopWindow(), //GetShellWindow()? flags, &d3dpp, &d3d9dev)) , NULL); qDebug("IDirect3DDevice9 created"); return d3d9dev; }
bool EGLInteropResource::ensureD3DDevice() { if (device9) return true; if (!dll9) dll9 = LoadLibrary(TEXT("D3D9.DLL")); if (!dll9) { qWarning("cuda::EGLInteropResource cannot load d3d9.dll"); return false; } D3DADAPTER_IDENTIFIER9 ai9; ZeroMemory(&ai9, sizeof(ai9)); device9 = DXHelper::CreateDevice9Ex(dll9, (IDirect3D9Ex**)(&d3d9), &ai9); if (!device9) { qWarning("Failed to create d3d9 device ex, fallback to d3d9 device"); device9 = DXHelper::CreateDevice9(dll9, &d3d9, &ai9); } if (!device9) return false; qDebug() << QString().sprintf("CUDA.D3D9 (%.*s, vendor %lu, device %lu, revision %lu)", sizeof(ai9.Description), ai9.Description, ai9.VendorId, ai9.DeviceId, ai9.Revision); // move to ensureResouce DX_ENSURE(device9->CreateQuery(D3DQUERYTYPE_EVENT, &query9), false); query9->Issue(D3DISSUE_END); return !!device9; }
bool EGLInteropResource::map(IDirect3DSurface9* surface, GLuint tex, int w, int h, int) { if (!ensureSurface(w, h)) { releaseEGL(); releaseDX(); return false; } const RECT src = { 0, 0, (~0-1)&w, (~0-1)&h}; DX_ENSURE(d3ddev->StretchRect(surface, &src, dx_surface, NULL, D3DTEXF_NONE), false); if (dx_query) { // Flush the draw command now. Ideally, this should be done immediately before the draw call that uses the texture. Flush it once here though. dx_query->Issue(D3DISSUE_END); //StretchRect does not supports odd values // ensure data is copied to egl surface. Solution and comment is from chromium // The DXVA decoder has its own device which it uses for decoding. ANGLE has its own device which we don't have access to. // The above code attempts to copy the decoded picture into a surface which is owned by ANGLE. // As there are multiple devices involved in this, the StretchRect call above is not synchronous. // We attempt to flush the batched operations to ensure that the picture is copied to the surface owned by ANGLE. // We need to do this in a loop and call flush multiple times. // We have seen the GetData call for flushing the command buffer fail to return success occassionally on multi core machines, leading to an infinite loop. // Workaround is to have an upper limit of 10 on the number of iterations to wait for the Flush to finish. int k = 0; while ((dx_query->GetData(NULL, 0, D3DGETDATA_FLUSH) == FALSE) && ++k < 10) { Sleep(1); } } DYGL(glBindTexture(GL_TEXTURE_2D, tex)); eglBindTexImage(egl->dpy, egl->surface, EGL_BACK_BUFFER); DYGL(glBindTexture(GL_TEXTURE_2D, 0)); return true; }
bool EGLInteropResource::ensureD3D9CUDA(int w, int h, int W, int H) { TexRes &r = res[0];// 1 NV12 texture if (r.w == w && r.h == h && r.W == W && r.H == H && r.cuRes) return true; if (!ctx) { // TODO: how to use pop/push decoder's context without the context in opengl context if (!ensureD3DDevice()) return false; // CUdevice is different from decoder's CUDA_ENSURE(cuD3D9CtxCreate(&ctx, &dev, CU_CTX_SCHED_BLOCKING_SYNC, device9), false); #if USE_STREAM CUDA_WARN(cuStreamCreate(&res[0].stream, CU_STREAM_DEFAULT)); CUDA_WARN(cuStreamCreate(&res[1].stream, CU_STREAM_DEFAULT)); #endif //USE_STREAM qDebug("cuda contex on gl thread: %p", ctx); CUDA_ENSURE(cuCtxPopCurrent(&ctx), false); // TODO: why cuMemcpy2D need this } if (r.cuRes) { CUDA_ENSURE(cuGraphicsUnregisterResource(r.cuRes), false); r.cuRes = NULL; } // create d3d resource for interop if (!surface9_nv12) { // TODO: need pitch from cuvid to ensure cuMemcpy2D can copy the whole pitch DX_ENSURE(device9->CreateTexture(W //, H , H*3/2 , 1 , D3DUSAGE_DYNAMIC //D3DUSAGE_DYNAMIC is lockable // 0 is from NV example. cudaD3D9.h says The primary rendertarget may not be registered with CUDA. So can not be D3DUSAGE_RENDERTARGET? //, D3DUSAGE_RENDERTARGET , D3DFMT_L8 //, (D3DFORMAT)MAKEFOURCC('N','V','1','2') // can not create nv12. use 2 textures L8+A8L8? , D3DPOOL_DEFAULT // must be D3DPOOL_DEFAULT for cuda? , &texture9_nv12 , NULL) // - Resources allocated as shared may not be registered with CUDA. , false); DX_ENSURE(device9->CreateOffscreenPlainSurface(W, H, (D3DFORMAT)MAKEFOURCC('N','V','1','2'), D3DPOOL_DEFAULT, &surface9_nv12, NULL), false); } // TODO: cudaD3D9.h says NV12 is not supported // CUDA_ERROR_INVALID_HANDLE if register D3D9 surface // TODO: why flag CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD is invalid while it's fine for opengl CUDA_ENSURE(cuGraphicsD3D9RegisterResource(&r.cuRes, texture9_nv12, CU_GRAPHICS_REGISTER_FLAGS_NONE), false); return true; }
bool EGLInteropResource::map(int picIndex, const CUVIDPROCPARAMS ¶m, GLuint tex, int w, int h, int H, int plane) { // plane is always 0 because frame is rgb AutoCtxLock locker((cuda_api*)this, lock); Q_UNUSED(locker); if (!ensureResource(w, h, param.Reserved[0], H, tex)) // TODO surface size instead of frame size because we copy the device data return false; //CUDA_ENSURE(cuCtxPushCurrent(ctx), false); CUdeviceptr devptr; unsigned int pitch; CUDA_ENSURE(cuvidMapVideoFrame(dec, picIndex, &devptr, &pitch, const_cast<CUVIDPROCPARAMS*>(¶m)), false); CUVIDAutoUnmapper unmapper(this, dec, devptr); Q_UNUSED(unmapper); // TODO: why can not use res[plane].stream? CUDA_ERROR_INVALID_HANDLE CUDA_ENSURE(cuGraphicsMapResources(1, &res[plane].cuRes, 0), false); CUarray array; CUDA_ENSURE(cuGraphicsSubResourceGetMappedArray(&array, res[plane].cuRes, 0, 0), false); CUDA_ENSURE(cuGraphicsUnmapResources(1, &res[plane].cuRes, 0), false); // mapped array still accessible! CUDA_MEMCPY2D cu2d; memset(&cu2d, 0, sizeof(cu2d)); // Y plane cu2d.srcDevice = devptr; cu2d.srcMemoryType = CU_MEMORYTYPE_DEVICE; cu2d.srcPitch = pitch; cu2d.dstArray = array; cu2d.dstMemoryType = CU_MEMORYTYPE_ARRAY; cu2d.dstPitch = pitch; // the whole size or copy size? cu2d.WidthInBytes = res[plane].W; // the same value as texture9_nv12 cu2d.Height = H*3/2; if (res[plane].stream) CUDA_ENSURE(cuMemcpy2DAsync(&cu2d, res[plane].stream), false); else CUDA_ENSURE(cuMemcpy2D(&cu2d), false); //TODO: delay cuCtxSynchronize && unmap. do it in unmap(tex)? // map to an already mapped resource will crash. sometimes I can not unmap the resource in unmap(tex) because if context switch error // so I simply unmap the resource here if (WORKAROUND_UNMAP_CONTEXT_SWITCH) { if (res[plane].stream) { //CUDA_WARN(cuCtxSynchronize(), false); //wait too long time? use cuStreamQuery? CUDA_WARN(cuStreamSynchronize(res[plane].stream)); //slower than CtxSynchronize } /* * This function provides the synchronization guarantee that any CUDA work issued * in \p stream before ::cuGraphicsUnmapResources() will complete before any * subsequently issued graphics work begins. * The graphics API from which \p resources were registered * should not access any resources while they are mapped by CUDA. If an * application does so, the results are undefined. */ // CUDA_ENSURE(cuGraphicsUnmapResources(1, &res[plane].cuRes, 0), false); } D3DLOCKED_RECT rect_src, rect_dst; DX_ENSURE(texture9_nv12->LockRect(0, &rect_src, NULL, D3DLOCK_READONLY), false); DX_ENSURE(surface9_nv12->LockRect(&rect_dst, NULL, D3DLOCK_DISCARD), false); memcpy(rect_dst.pBits, rect_src.pBits, res[plane].W*H*3/2); // exactly w and h DX_ENSURE(surface9_nv12->UnlockRect(), false); DX_ENSURE(texture9_nv12->UnlockRect(0), false); #if 0 //IDirect3DSurface9 *raw_surface = NULL; //DX_ENSURE(texture9_nv12->GetSurfaceLevel(0, &raw_surface), false); const RECT src = { 0, 0, w, h*3/2}; DX_ENSURE(device9->StretchRect(raw_surface, &src, surface9_nv12, NULL, D3DTEXF_NONE), false); #endif if (!map(surface9_nv12, tex, w, h, H)) return false; return true; }
bool EGLInteropResource::ensureD3D9EGL(int w, int h) { if (surface9 && res[0].w == w && res[0].h == h) return true; #if QTAV_HAVE(GUI_PRIVATE) QPlatformNativeInterface *nativeInterface = QGuiApplication::platformNativeInterface(); egl->dpy = static_cast<EGLDisplay>(nativeInterface->nativeResourceForContext("eglDisplay", QOpenGLContext::currentContext())); EGLConfig egl_cfg = static_cast<EGLConfig>(nativeInterface->nativeResourceForContext("eglConfig", QOpenGLContext::currentContext())); #else #ifdef Q_OS_WIN #if QT_VERSION < QT_VERSION_CHECK(5, 5, 0) #ifdef _MSC_VER #pragma message("ANGLE version in Qt<5.5 does not support eglQueryContext. You must upgrade your runtime ANGLE libraries") #else #warning "ANGLE version in Qt<5.5 does not support eglQueryContext. You must upgrade your runtime ANGLE libraries" #endif //_MSC_VER #endif #endif //Q_OS_WIN // eglQueryContext() added (Feb 2015): https://github.com/google/angle/commit/8310797003c44005da4143774293ea69671b0e2a egl->dpy = eglGetCurrentDisplay(); qDebug("EGL version: %s, client api: %s", eglQueryString(egl->dpy, EGL_VERSION), eglQueryString(egl->dpy, EGL_CLIENT_APIS)); // TODO: check runtime egl>=1.4 for eglGetCurrentContext() EGLint cfg_id = 0; EGL_ENSURE(eglQueryContext(egl->dpy, eglGetCurrentContext(), EGL_CONFIG_ID , &cfg_id) == EGL_TRUE, false); qDebug("egl config id: %d", cfg_id); EGLint nb_cfg = 0; EGL_ENSURE(eglGetConfigs(egl->dpy, NULL, 0, &nb_cfg) == EGL_TRUE, false); qDebug("eglGetConfigs number: %d", nb_cfg); QVector<EGLConfig> cfgs(nb_cfg); //check > 0 EGL_ENSURE(eglGetConfigs(egl->dpy, cfgs.data(), cfgs.size(), &nb_cfg) == EGL_TRUE, false); EGLConfig egl_cfg = NULL; for (int i = 0; i < nb_cfg; ++i) { EGLint id = 0; eglGetConfigAttrib(egl->dpy, cfgs[i], EGL_CONFIG_ID, &id); if (id == cfg_id) { egl_cfg = cfgs[i]; break; } } #endif qDebug("egl display:%p config: %p", egl->dpy, egl_cfg); // check extensions QList<QByteArray> extensions = QByteArray(eglQueryString(egl->dpy, EGL_EXTENSIONS)).split(' '); // ANGLE_d3d_share_handle_client_buffer will be used if possible const bool kEGL_ANGLE_d3d_share_handle_client_buffer = extensions.contains("EGL_ANGLE_d3d_share_handle_client_buffer"); const bool kEGL_ANGLE_query_surface_pointer = extensions.contains("EGL_ANGLE_query_surface_pointer"); if (!kEGL_ANGLE_d3d_share_handle_client_buffer && !kEGL_ANGLE_query_surface_pointer) { qWarning("EGL extension 'kEGL_ANGLE_query_surface_pointer' or 'ANGLE_d3d_share_handle_client_buffer' is required!"); return false; } GLint has_alpha = 1; //QOpenGLContext::currentContext()->format().hasAlpha() eglGetConfigAttrib(egl->dpy, egl_cfg, EGL_BIND_TO_TEXTURE_RGBA, &has_alpha); //EGL_ALPHA_SIZE EGLint attribs[] = { EGL_WIDTH, w, EGL_HEIGHT, h, EGL_TEXTURE_FORMAT, has_alpha ? EGL_TEXTURE_RGBA : EGL_TEXTURE_RGB, EGL_TEXTURE_TARGET, EGL_TEXTURE_2D, EGL_NONE }; HANDLE share_handle = NULL; if (!kEGL_ANGLE_d3d_share_handle_client_buffer && kEGL_ANGLE_query_surface_pointer) { EGL_ENSURE((egl->surface = eglCreatePbufferSurface(egl->dpy, egl_cfg, attribs)) != EGL_NO_SURFACE, false); qDebug("pbuffer surface: %p", egl->surface); PFNEGLQUERYSURFACEPOINTERANGLEPROC eglQuerySurfacePointerANGLE = reinterpret_cast<PFNEGLQUERYSURFACEPOINTERANGLEPROC>(eglGetProcAddress("eglQuerySurfacePointerANGLE")); if (!eglQuerySurfacePointerANGLE) { qWarning("EGL_ANGLE_query_surface_pointer is not supported"); return false; } EGL_ENSURE(eglQuerySurfacePointerANGLE(egl->dpy, egl->surface, EGL_D3D_TEXTURE_2D_SHARE_HANDLE_ANGLE, &share_handle), false); } SafeRelease(&surface9); SafeRelease(&texture9); // _A8 for a yuv plane /* * d3d resource share requires windows >= vista: https://msdn.microsoft.com/en-us/library/windows/desktop/bb219800(v=vs.85).aspx * from extension files: * d3d9: level must be 1, dimensions must match EGL surface's * d3d9ex or d3d10: */ DX_ENSURE(device9->CreateTexture(w, h, 1, D3DUSAGE_RENDERTARGET, has_alpha ? D3DFMT_A8R8G8B8 : D3DFMT_X8R8G8B8, D3DPOOL_DEFAULT, &texture9, &share_handle) , false); DX_ENSURE(texture9->GetSurfaceLevel(0, &surface9), false); if (kEGL_ANGLE_d3d_share_handle_client_buffer) { // requires extension EGL_ANGLE_d3d_share_handle_client_buffer // egl surface size must match d3d texture's // d3d9ex or d3d10 is required EGL_ENSURE((egl->surface = eglCreatePbufferFromClientBuffer(egl->dpy, EGL_D3D_TEXTURE_2D_SHARE_HANDLE_ANGLE, share_handle, egl_cfg, attribs)), false); qDebug("pbuffer surface from client buffer: %p", egl->surface); } return true; }