VideoFrame VideoDecoderDXVA::frame() { DPTR_D(VideoDecoderDXVA); //qDebug("frame size: %dx%d", d.frame->width, d.frame->height); if (!d.frame->opaque || !d.frame->data[0]) return VideoFrame(); if (d.frame->width <= 0 || d.frame->height <= 0 || !d.codec_ctx) return VideoFrame(); IDirect3DSurface9 *d3d = (IDirect3DSurface9*)(uintptr_t)d.frame->data[3]; if (copyMode() == ZeroCopy && d.interop_res) { dxva::SurfaceInteropDXVA *interop = new dxva::SurfaceInteropDXVA(d.interop_res); interop->setSurface(d3d, width(), height()); VideoFrame f(width(), height(), VideoFormat::Format_RGB32); //p->width() f.setBytesPerLine(d.width * 4); //used by gl to compute texture size f.setMetaData(QStringLiteral("surface_interop"), QVariant::fromValue(VideoSurfaceInteropPtr(interop))); f.setTimestamp(d.frame->pkt_pts/1000.0); f.setDisplayAspectRatio(d.getDAR(d.frame)); return f; } class ScopedD3DLock { IDirect3DSurface9 *mpD3D; public: ScopedD3DLock(IDirect3DSurface9* d3d, D3DLOCKED_RECT *rect) : mpD3D(d3d) { if (FAILED(mpD3D->LockRect(rect, NULL, D3DLOCK_READONLY))) { qWarning("Failed to lock surface"); mpD3D = 0; } } ~ScopedD3DLock() { if (mpD3D) mpD3D->UnlockRect(); } }; D3DLOCKED_RECT lock; ScopedD3DLock(d3d, &lock); if (lock.Pitch == 0) { return VideoFrame(); } //picth >= desc.Width D3DSURFACE_DESC desc; d3d->GetDesc(&desc); const VideoFormat fmt = VideoFormat(pixelFormatFromD3D(desc.Format)); if (!fmt.isValid()) { qWarning("unsupported dxva pixel format: %#x", desc.Format); return VideoFrame(); } //YV12 need swap, not imc3? // imc3 U V pitch == Y pitch, but half of the U/V plane is space. we convert to yuv420p here // nv12 bpp(1)==1 // 3rd plane is not used for nv12 int pitch[3] = { lock.Pitch, 0, 0}; //compute chroma later uint8_t *src[] = { (uint8_t*)lock.pBits, 0, 0}; //compute chroma later const bool swap_uv = desc.Format == MAKEFOURCC('I','M','C','3'); return copyToFrame(fmt, d.surface_height, src, pitch, swap_uv); }
VideoFrame VideoDecoderDXVA::frame() { DPTR_D(VideoDecoderDXVA); if (!d.frame->opaque || !d.frame->data[0]) return VideoFrame(); if (d.width <= 0 || d.height <= 0 || !d.codec_ctx) return VideoFrame(); class ScopedD3DLock { public: ScopedD3DLock(IDirect3DSurface9* d3d, D3DLOCKED_RECT *rect) : mpD3D(d3d) { if (FAILED(mpD3D->LockRect(rect, NULL, D3DLOCK_READONLY))) { qWarning("Failed to lock surface"); mpD3D = 0; } } ~ScopedD3DLock() { if (mpD3D) mpD3D->UnlockRect(); } private: IDirect3DSurface9 *mpD3D; }; IDirect3DSurface9 *d3d = (IDirect3DSurface9*)(uintptr_t)d.frame->data[3]; //picth >= desc.Width //D3DSURFACE_DESC desc; //d3d->GetDesc(&desc); D3DLOCKED_RECT lock; ScopedD3DLock(d3d, &lock); if (lock.Pitch == 0) { return VideoFrame(); } const VideoFormat fmt = VideoFormat((int)D3dFindFormat(d.render)->avpixfmt); if (!fmt.isValid()) { qWarning("unsupported dxva pixel format: %#x", d.render); return VideoFrame(); } //YV12 need swap, not imc3? // imc3 U V pitch == Y pitch, but half of the U/V plane is space. we convert to yuv420p here // nv12 bpp(1)==1 // 3rd plane is not used for nv12 int pitch[3] = { lock.Pitch, 0, 0}; //compute chroma later uint8_t *src[] = { (uint8_t*)lock.pBits, 0, 0}; //compute chroma later const bool swap_uv = d.render == MAKEFOURCC('I','M','C','3'); return copyToFrame(fmt, d.surface_height, src, pitch, swap_uv); }
VideoFrame VideoDecoderVAAPI::frame() { DPTR_D(VideoDecoderVAAPI); if (!d.frame->opaque || !d.frame->data[0]) return VideoFrame(); VASurfaceID surface_id = (VASurfaceID)(uintptr_t)d.frame->data[3]; VAStatus status = VA_STATUS_SUCCESS; if (display() == GLX || (copyMode() == ZeroCopy && display() == X11)) { surface_ptr p; std::list<surface_ptr>::iterator it = d.surfaces_used.begin(); for (; it != d.surfaces_used.end() && !p; ++it) { if((*it)->get() == surface_id) { p = *it; break; } } if (!p) { for (it = d.surfaces_free.begin(); it != d.surfaces_free.end() && !p; ++it) { if((*it)->get() == surface_id) { p = *it; break; } } } if (!p) { qWarning("VAAPI - Unable to find surface"); return VideoFrame(); } ((SurfaceInteropVAAPI*)d.surface_interop.data())->setSurface(p); VideoFrame f(d.width, d.height, VideoFormat::Format_RGB32); //p->width() f.setBytesPerLine(d.width*4); //used by gl to compute texture size f.setMetaData("surface_interop", QVariant::fromValue(d.surface_interop)); f.setTimestamp(double(d.frame->pkt_pts)/1000.0); return f; } #if VA_CHECK_VERSION(0,31,0) if ((status = vaSyncSurface(d.display->get(), surface_id)) != VA_STATUS_SUCCESS) { qWarning("vaSyncSurface(VADisplay:%p, VASurfaceID:%#x) == %#x", d.display->get(), surface_id, status); #else if (vaSyncSurface(d.display->get(), d.context_id, surface_id)) { qWarning("vaSyncSurface(VADisplay:%#x, VAContextID:%#x, VASurfaceID:%#x) == %#x", d.display, d.context_id, surface_id, status); #endif return VideoFrame(); } if (!d.disable_derive && d.supports_derive) { /* * http://web.archiveorange.com/archive/v/OAywENyq88L319OcRnHI * vaDeriveImage is faster than vaGetImage. But VAImage is uncached memory and copying from it would be terribly slow * TODO: copy from USWC, see vlc and https://github.com/OpenELEC/OpenELEC.tv/pull/2937.diff * https://software.intel.com/en-us/articles/increasing-memory-throughput-with-intel-streaming-simd-extensions-4-intel-sse4-streaming-load */ VA_ENSURE_TRUE(vaDeriveImage(d.display->get(), surface_id, &d.image), VideoFrame()); } else { VA_ENSURE_TRUE(vaGetImage(d.display->get(), surface_id, 0, 0, d.width, d.height, d.image.image_id), VideoFrame()); } void *p_base; VA_ENSURE_TRUE(vaMapBuffer(d.display->get(), d.image.buf, &p_base), VideoFrame()); VideoFormat::PixelFormat pixfmt = VideoFormat::Format_Invalid; bool swap_uv = false; switch (d.image.format.fourcc) { case VA_FOURCC_YV12: swap_uv |= d.disable_derive || !d.supports_derive; pixfmt = VideoFormat::Format_YUV420P; break; case VA_FOURCC_IYUV: swap_uv = true; pixfmt = VideoFormat::Format_YUV420P; break; case VA_FOURCC_NV12: pixfmt = VideoFormat::Format_NV12; break; default: break; } if (pixfmt == VideoFormat::Format_Invalid) { qWarning("unsupported vaapi pixel format: %#x", d.image.format.fourcc); return VideoFrame(); } const VideoFormat fmt(pixfmt); uint8_t *src[3]; int pitch[3]; for (int i = 0; i < fmt.planeCount(); ++i) { src[i] = (uint8_t*)p_base + d.image.offsets[i]; pitch[i] = d.image.pitches[i]; } VideoFrame frame(copyToFrame(fmt, d.surface_height, src, pitch, swap_uv)); VAWARN(vaUnmapBuffer(d.display->get(), d.image.buf)); if (!d.disable_derive && d.supports_derive) { vaDestroyImage(d.display->get(), d.image.image_id); d.image.image_id = VA_INVALID_ID; } return frame; } void VideoDecoderVAAPI::setDisplayPriority(const QStringList &priority) { DPTR_D(VideoDecoderVAAPI); d.display_priority.clear(); int idx = staticMetaObject.indexOfEnumerator("DisplayType"); const QMetaEnum me = staticMetaObject.enumerator(idx); foreach (const QString& disp, priority) { d.display_priority.push_back((DisplayType)me.keyToValue(disp.toUtf8().constData())); }