/* * If the frame is not created for direct rendering, then the frame data is already deep copied, so detach is enough. * TODO: map frame from texture etc. */ void VideoCapture::setVideoFrame(const VideoFrame &frame) { // parameter in ready(QtAV::VideoFrame) ensure we can access the frame without lock /* * clone here may block VideoThread. But if not clone here, the frame may be * modified outside and is not safe. */ this->frame = frame.clone(); // TODO: no clone, use detach() }
VideoFrame VideoDecoderFFmpegHW::copyToFrame(const VideoFormat& fmt, int surface_h, quint8 *src[], int pitch[], bool swapUV) { DPTR_D(VideoDecoderFFmpegHW); Q_ASSERT_X(src[0] && pitch[0] > 0, "VideoDecoderFFmpegHW::copyToFrame", "src[0] and pitch[0] must be set"); const int nb_planes = fmt.planeCount(); const int chroma_pitch = nb_planes > 1 ? fmt.bytesPerLine(pitch[0], 1) : 0; const int chroma_h = fmt.chromaHeight(surface_h); int h[] = { surface_h, 0, 0}; for (int i = 1; i < nb_planes; ++i) { h[i] = chroma_h; // set chroma address and pitch if not set if (pitch[i] <= 0) pitch[i] = chroma_pitch; if (!src[i]) src[i] = src[i-1] + pitch[i-1]*h[i-1]; } if (swapUV) { std::swap(src[1], src[2]); std::swap(pitch[1], pitch[2]); } VideoFrame frame; if (copyMode() == VideoDecoderFFmpegHW::OptimizedCopy && d.gpu_mem.isReady()) { int yuv_size = 0; for (int i = 0; i < nb_planes; ++i) { yuv_size += pitch[i]*h[i]; } // additional 15 bytes to ensure 16 bytes aligned QByteArray buf(15 + yuv_size, 0); const int offset_16 = (16 - ((uintptr_t)buf.data() & 0x0f)) & 0x0f; // plane 1, 2... is aligned? uchar* plane_ptr = (uchar*)buf.data() + offset_16; QVector<uchar*> dst(nb_planes, 0); for (int i = 0; i < nb_planes; ++i) { dst[i] = plane_ptr; // TODO: add VideoFormat::planeWidth/Height() ? // pitch instead of surface_width plane_ptr += pitch[i] * h[i]; d.gpu_mem.copyFrame(src[i], dst[i], pitch[i], h[i], pitch[i]); } frame = VideoFrame(buf, width(), height(), fmt); frame.setBits(dst); frame.setBytesPerLine(pitch); } else { frame = VideoFrame(width(), height(), fmt); frame.setBits(src); frame.setBytesPerLine(pitch); // TODO: why clone is faster()? // TODO: buffer pool and create VideoFrame when needed to avoid copy? also for other va frame = frame.clone(); } frame.setTimestamp(double(d.frame->pkt_pts)/1000.0); frame.setDisplayAspectRatio(d.getDAR(d.frame)); d.updateColorDetails(&frame); return frame; }
VideoFrame VideoDecoderVAAPI::frame() { DPTR_D(VideoDecoderVAAPI); if (!d.frame->opaque || !d.frame->data[0]) return VideoFrame(); VASurfaceID surface_id = (VASurfaceID)(uintptr_t)d.frame->data[3]; VAStatus status = VA_STATUS_SUCCESS; if (display() == GLX) { d.surface_interop->setSurface((va_surface_t*)d.frame->opaque, d.surface_width, d.surface_height); VideoFrame f(d.surface_width, d.surface_height, VideoFormat::Format_RGB32); f.setBytesPerLine(d.surface_width*4); //used by gl to compute texture size f.setSurfaceInterop(d.surface_interop); return f; } #if VA_CHECK_VERSION(0,31,0) if ((status = vaSyncSurface(d.display, surface_id)) != VA_STATUS_SUCCESS) { qWarning("vaSyncSurface(VADisplay:%p, VASurfaceID:%#x) == %#x", d.display, surface_id, status); #else if (vaSyncSurface(d.display, d.context_id, surface_id)) { qWarning("vaSyncSurface(VADisplay:%#x, VAContextID:%#x, VASurfaceID:%#x) == %#x", d.display, d.context_id, surface_id, status); #endif return VideoFrame(); } if (!d.disable_derive && d.supports_derive) { /* * http://web.archiveorange.com/archive/v/OAywENyq88L319OcRnHI * vaDeriveImage is faster than vaGetImage. But VAImage is uncached memory and copying from it would be terribly slow * TODO: copy from USWC, see vlc and https://github.com/OpenELEC/OpenELEC.tv/pull/2937.diff * https://software.intel.com/en-us/articles/increasing-memory-throughput-with-intel-streaming-simd-extensions-4-intel-sse4-streaming-load */ status = vaDeriveImage(d.display, surface_id, &d.image); if (status != VA_STATUS_SUCCESS) { qWarning("vaDeriveImage(VADisplay:%p, VASurfaceID:%#x, VAImage*:%p) == %#x", d.display, surface_id, &d.image, status); return VideoFrame(); } } else { status = vaGetImage(d.display, surface_id, 0, 0, d.surface_width, d.surface_height, d.image.image_id); if (status != VA_STATUS_SUCCESS) { qWarning("vaGetImage(VADisplay:%p, VASurfaceID:%#x, 0,0, %d, %d, VAImageID:%#x) == %#x", d.display, surface_id, d.surface_width, d.surface_height, d.image.image_id, status); return VideoFrame(); } } void *p_base; if ((status = vaMapBuffer(d.display, d.image.buf, &p_base)) != VA_STATUS_SUCCESS) { qWarning("vaMapBuffer(VADisplay:%p, VABufferID:%#x, pBuf:%p) == %#x", d.display, d.image.buf, &p_base, status); return VideoFrame(); } VideoFormat::PixelFormat pixfmt = VideoFormat::Format_Invalid; bool swap_uv = false; switch (d.image.format.fourcc) { case VA_FOURCC_YV12: swap_uv |= d.disable_derive || !d.supports_derive; pixfmt = VideoFormat::Format_YUV420P; break; case VA_FOURCC_IYUV: swap_uv = true; pixfmt = VideoFormat::Format_YUV420P; break; case VA_FOURCC_NV12: pixfmt = VideoFormat::Format_NV12; break; default: break; } if (pixfmt == VideoFormat::Format_Invalid) { qWarning("unsupported vaapi pixel format: %#x", d.image.format.fourcc); return VideoFrame(); } const VideoFormat fmt(pixfmt); uint8_t *src[3]; int pitch[3]; for (int i = 0; i < fmt.planeCount(); ++i) { src[i] = (uint8_t*)p_base + d.image.offsets[i]; pitch[i] = d.image.pitches[i]; } if (swap_uv) { std::swap(src[1], src[2]); std::swap(pitch[1], pitch[2]); } VideoFrame frame; if (d.copy_uswc && d.gpu_mem.isReady()) { int yuv_size = 0; if (pixfmt == VideoFormat::Format_NV12) yuv_size = pitch[0]*d.surface_height*3/2; else yuv_size = pitch[0]*d.surface_height + pitch[1]*d.surface_height/2 + pitch[2]*d.surface_height/2; // additional 15 bytes to ensure 16 bytes aligned QByteArray buf(15 + yuv_size, 0); const int offset_16 = (16 - ((uintptr_t)buf.data() & 0x0f)) & 0x0f; // plane 1, 2... is aligned? uchar* plane_ptr = (uchar*)buf.data() + offset_16; QVector<uchar*> dst(fmt.planeCount(), 0); for (int i = 0; i < dst.size(); ++i) { dst[i] = plane_ptr; // TODO: add VideoFormat::planeWidth/Height() ? const int plane_w = pitch[i];//(i == 0 || pixfmt == VideoFormat::Format_NV12) ? d.surface_width : fmt.chromaWidth(d.surface_width); const int plane_h = i == 0 ? d.surface_height : fmt.chromaHeight(d.surface_height); plane_ptr += pitch[i] * plane_h; d.gpu_mem.copyFrame(src[i], dst[i], plane_w, plane_h, pitch[i]); } frame = VideoFrame(buf, d.width, d.height, fmt); frame.setBits(dst); frame.setBytesPerLine(pitch); } else { frame = VideoFrame(d.width, d.height, fmt); frame.setBits(src); frame.setBytesPerLine(pitch); // TODO: why clone is faster()? frame = frame.clone(); } if ((status = vaUnmapBuffer(d.display, d.image.buf)) != VA_STATUS_SUCCESS) { qWarning("vaUnmapBuffer(VADisplay:%p, VABufferID:%#x) == %#x", d.display, d.image.buf, status); return VideoFrame(); } if (!d.disable_derive && d.supports_derive) { vaDestroyImage(d.display, d.image.image_id); d.image.image_id = VA_INVALID_ID; } return frame; } struct display_names_t { VideoDecoderVAAPI::DisplayType display; QString name; }; static const display_names_t display_names[] = { { VideoDecoderVAAPI::GLX, "GLX" }, { VideoDecoderVAAPI::X11, "X11" }, { VideoDecoderVAAPI::DRM, "DRM" } }; static VideoDecoderVAAPI::DisplayType displayFromName(QString name) { for (unsigned int i = 0; i < sizeof(display_names)/sizeof(display_names[0]); ++i) { if (name.toUpper().contains(display_names[i].name.toUpper())) { return display_names[i].display; } } return VideoDecoderVAAPI::X11; } static QString displayToName(VideoDecoderVAAPI::DisplayType t) { for (unsigned int i = 0; i < sizeof(display_names)/sizeof(display_names[0]); ++i) { if (t == display_names[i].display) { return display_names[i].name; } } return QString(); } void VideoDecoderVAAPI::setDisplayPriority(const QStringList &priority) { DPTR_D(VideoDecoderVAAPI); d.display_priority.clear(); foreach (QString disp, priority) { d.display_priority.push_back(displayFromName(disp)); }