VideoFrame VideoDecoderDXVA::frame() { DPTR_D(VideoDecoderDXVA); //qDebug("frame size: %dx%d", d.frame->width, d.frame->height); if (!d.frame->opaque || !d.frame->data[0]) return VideoFrame(); if (d.frame->width <= 0 || d.frame->height <= 0 || !d.codec_ctx) return VideoFrame(); IDirect3DSurface9 *d3d = (IDirect3DSurface9*)(uintptr_t)d.frame->data[3]; if (copyMode() == ZeroCopy && d.interop_res) { dxva::SurfaceInteropDXVA *interop = new dxva::SurfaceInteropDXVA(d.interop_res); interop->setSurface(d3d, width(), height()); VideoFrame f(width(), height(), VideoFormat::Format_RGB32); //p->width() f.setBytesPerLine(d.width * 4); //used by gl to compute texture size f.setMetaData(QStringLiteral("surface_interop"), QVariant::fromValue(VideoSurfaceInteropPtr(interop))); f.setTimestamp(d.frame->pkt_pts/1000.0); f.setDisplayAspectRatio(d.getDAR(d.frame)); return f; } class ScopedD3DLock { IDirect3DSurface9 *mpD3D; public: ScopedD3DLock(IDirect3DSurface9* d3d, D3DLOCKED_RECT *rect) : mpD3D(d3d) { if (FAILED(mpD3D->LockRect(rect, NULL, D3DLOCK_READONLY))) { qWarning("Failed to lock surface"); mpD3D = 0; } } ~ScopedD3DLock() { if (mpD3D) mpD3D->UnlockRect(); } }; D3DLOCKED_RECT lock; ScopedD3DLock(d3d, &lock); if (lock.Pitch == 0) { return VideoFrame(); } //picth >= desc.Width D3DSURFACE_DESC desc; d3d->GetDesc(&desc); const VideoFormat fmt = VideoFormat(pixelFormatFromD3D(desc.Format)); if (!fmt.isValid()) { qWarning("unsupported dxva pixel format: %#x", desc.Format); return VideoFrame(); } //YV12 need swap, not imc3? // imc3 U V pitch == Y pitch, but half of the U/V plane is space. we convert to yuv420p here // nv12 bpp(1)==1 // 3rd plane is not used for nv12 int pitch[3] = { lock.Pitch, 0, 0}; //compute chroma later uint8_t *src[] = { (uint8_t*)lock.pBits, 0, 0}; //compute chroma later const bool swap_uv = desc.Format == MAKEFOURCC('I','M','C','3'); return copyToFrame(fmt, d.surface_height, src, pitch, swap_uv); }
VideoFrame VideoDecoderFFmpegHW::copyToFrame(const VideoFormat& fmt, int surface_h, quint8 *src[], int pitch[], bool swapUV) { DPTR_D(VideoDecoderFFmpegHW); Q_ASSERT_X(src[0] && pitch[0] > 0, "VideoDecoderFFmpegHW::copyToFrame", "src[0] and pitch[0] must be set"); const int nb_planes = fmt.planeCount(); const int chroma_pitch = nb_planes > 1 ? fmt.bytesPerLine(pitch[0], 1) : 0; const int chroma_h = fmt.chromaHeight(surface_h); int h[] = { surface_h, 0, 0}; for (int i = 1; i < nb_planes; ++i) { h[i] = chroma_h; // set chroma address and pitch if not set if (pitch[i] <= 0) pitch[i] = chroma_pitch; if (!src[i]) src[i] = src[i-1] + pitch[i-1]*h[i-1]; } if (swapUV) { std::swap(src[1], src[2]); std::swap(pitch[1], pitch[2]); } VideoFrame frame; if (copyMode() == VideoDecoderFFmpegHW::OptimizedCopy && d.gpu_mem.isReady()) { int yuv_size = 0; for (int i = 0; i < nb_planes; ++i) { yuv_size += pitch[i]*h[i]; } // additional 15 bytes to ensure 16 bytes aligned QByteArray buf(15 + yuv_size, 0); const int offset_16 = (16 - ((uintptr_t)buf.data() & 0x0f)) & 0x0f; // plane 1, 2... is aligned? uchar* plane_ptr = (uchar*)buf.data() + offset_16; QVector<uchar*> dst(nb_planes, 0); for (int i = 0; i < nb_planes; ++i) { dst[i] = plane_ptr; // TODO: add VideoFormat::planeWidth/Height() ? // pitch instead of surface_width plane_ptr += pitch[i] * h[i]; d.gpu_mem.copyFrame(src[i], dst[i], pitch[i], h[i], pitch[i]); } frame = VideoFrame(buf, width(), height(), fmt); frame.setBits(dst); frame.setBytesPerLine(pitch); } else { frame = VideoFrame(width(), height(), fmt); frame.setBits(src); frame.setBytesPerLine(pitch); // TODO: why clone is faster()? // TODO: buffer pool and create VideoFrame when needed to avoid copy? also for other va frame = frame.clone(); } frame.setTimestamp(double(d.frame->pkt_pts)/1000.0); frame.setDisplayAspectRatio(d.getDAR(d.frame)); d.updateColorDetails(&frame); return frame; }
VideoFrame VideoDecoderVAAPI::frame() { DPTR_D(VideoDecoderVAAPI); if (!d.frame->opaque || !d.frame->data[0]) return VideoFrame(); VASurfaceID surface_id = (VASurfaceID)(uintptr_t)d.frame->data[3]; VAStatus status = VA_STATUS_SUCCESS; if (display() == GLX || (copyMode() == ZeroCopy && display() == X11)) { surface_ptr p; std::list<surface_ptr>::iterator it = d.surfaces_used.begin(); for (; it != d.surfaces_used.end() && !p; ++it) { if((*it)->get() == surface_id) { p = *it; break; } } if (!p) { for (it = d.surfaces_free.begin(); it != d.surfaces_free.end() && !p; ++it) { if((*it)->get() == surface_id) { p = *it; break; } } } if (!p) { qWarning("VAAPI - Unable to find surface"); return VideoFrame(); } ((SurfaceInteropVAAPI*)d.surface_interop.data())->setSurface(p); VideoFrame f(d.width, d.height, VideoFormat::Format_RGB32); //p->width() f.setBytesPerLine(d.width*4); //used by gl to compute texture size f.setMetaData("surface_interop", QVariant::fromValue(d.surface_interop)); f.setTimestamp(double(d.frame->pkt_pts)/1000.0); return f; } #if VA_CHECK_VERSION(0,31,0) if ((status = vaSyncSurface(d.display->get(), surface_id)) != VA_STATUS_SUCCESS) { qWarning("vaSyncSurface(VADisplay:%p, VASurfaceID:%#x) == %#x", d.display->get(), surface_id, status); #else if (vaSyncSurface(d.display->get(), d.context_id, surface_id)) { qWarning("vaSyncSurface(VADisplay:%#x, VAContextID:%#x, VASurfaceID:%#x) == %#x", d.display, d.context_id, surface_id, status); #endif return VideoFrame(); } if (!d.disable_derive && d.supports_derive) { /* * http://web.archiveorange.com/archive/v/OAywENyq88L319OcRnHI * vaDeriveImage is faster than vaGetImage. But VAImage is uncached memory and copying from it would be terribly slow * TODO: copy from USWC, see vlc and https://github.com/OpenELEC/OpenELEC.tv/pull/2937.diff * https://software.intel.com/en-us/articles/increasing-memory-throughput-with-intel-streaming-simd-extensions-4-intel-sse4-streaming-load */ VA_ENSURE_TRUE(vaDeriveImage(d.display->get(), surface_id, &d.image), VideoFrame()); } else { VA_ENSURE_TRUE(vaGetImage(d.display->get(), surface_id, 0, 0, d.width, d.height, d.image.image_id), VideoFrame()); } void *p_base; VA_ENSURE_TRUE(vaMapBuffer(d.display->get(), d.image.buf, &p_base), VideoFrame()); VideoFormat::PixelFormat pixfmt = VideoFormat::Format_Invalid; bool swap_uv = false; switch (d.image.format.fourcc) { case VA_FOURCC_YV12: swap_uv |= d.disable_derive || !d.supports_derive; pixfmt = VideoFormat::Format_YUV420P; break; case VA_FOURCC_IYUV: swap_uv = true; pixfmt = VideoFormat::Format_YUV420P; break; case VA_FOURCC_NV12: pixfmt = VideoFormat::Format_NV12; break; default: break; } if (pixfmt == VideoFormat::Format_Invalid) { qWarning("unsupported vaapi pixel format: %#x", d.image.format.fourcc); return VideoFrame(); } const VideoFormat fmt(pixfmt); uint8_t *src[3]; int pitch[3]; for (int i = 0; i < fmt.planeCount(); ++i) { src[i] = (uint8_t*)p_base + d.image.offsets[i]; pitch[i] = d.image.pitches[i]; } VideoFrame frame(copyToFrame(fmt, d.surface_height, src, pitch, swap_uv)); VAWARN(vaUnmapBuffer(d.display->get(), d.image.buf)); if (!d.disable_derive && d.supports_derive) { vaDestroyImage(d.display->get(), d.image.image_id); d.image.image_id = VA_INVALID_ID; } return frame; } void VideoDecoderVAAPI::setDisplayPriority(const QStringList &priority) { DPTR_D(VideoDecoderVAAPI); d.display_priority.clear(); int idx = staticMetaObject.indexOfEnumerator("DisplayType"); const QMetaEnum me = staticMetaObject.enumerator(idx); foreach (const QString& disp, priority) { d.display_priority.push_back((DisplayType)me.keyToValue(disp.toUtf8().constData())); }