cuda::GpuMat mapFrame(int picIdx, CUVIDPROCPARAMS& videoProcParams) { CUdeviceptr ptr; unsigned int pitch; cuSafeCall( cuvidMapVideoFrame(decoder_, picIdx, &ptr, &pitch, &videoProcParams) ); return cuda::GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch); }
void VideoDecoder::mapFrame(int iPictureIndex, CUdeviceptr *ppDevice, unsigned int *pPitch, CUVIDPROCPARAMS *pVideoProcessingParameters) { CUresult oResult = cuvidMapVideoFrame(oDecoder_, iPictureIndex, ppDevice, pPitch, pVideoProcessingParameters); assert(CUDA_SUCCESS == oResult); assert(0 != *ppDevice); assert(0 != *pPitch); }
void* InteropResource::mapToHost(const VideoFormat &format, void *handle, int picIndex, const CUVIDPROCPARAMS ¶m, int width, int height, int coded_height) { AutoCtxLock locker((cuda_api*)this, lock); Q_UNUSED(locker); CUdeviceptr devptr; unsigned int pitch; CUDA_ENSURE(cuvidMapVideoFrame(dec, picIndex, &devptr, &pitch, const_cast<CUVIDPROCPARAMS*>(¶m)), NULL); CUVIDAutoUnmapper unmapper(this, dec, devptr); Q_UNUSED(unmapper); uchar* host_data = NULL; const size_t host_size = pitch*coded_height*3/2; CUDA_ENSURE(cuMemAllocHost((void**)&host_data, host_size), NULL); // copy to the memory not allocated by cuda is possible but much slower CUDA_ENSURE(cuMemcpyDtoH(host_data, devptr, host_size), NULL); VideoFrame frame(width, height, VideoFormat::Format_NV12); uchar *planes[] = { host_data, host_data + pitch * coded_height }; frame.setBits(planes); int pitches[] = { (int)pitch, (int)pitch }; frame.setBytesPerLine(pitches); VideoFrame *f = reinterpret_cast<VideoFrame*>(handle); frame.setTimestamp(f->timestamp()); frame.setDisplayAspectRatio(f->displayAspectRatio()); if (format == frame.format()) *f = frame.clone(); else *f = frame.to(format); cuMemFreeHost(host_data); return f; }
bool VideoDecoderCUDAPrivate::processDecodedData(CUVIDPARSERDISPINFO *cuviddisp, VideoFrame* outFrame) { int num_fields = cuviddisp->progressive_frame ? 1 : 2+cuviddisp->repeat_first_field; for (int active_field = 0; active_field < num_fields; ++active_field) { CUVIDPROCPARAMS proc_params; memset(&proc_params, 0, sizeof(CUVIDPROCPARAMS)); proc_params.progressive_frame = cuviddisp->progressive_frame; //check user config proc_params.second_field = active_field == 1; //check user config proc_params.top_field_first = cuviddisp->top_field_first; proc_params.unpaired_field = cuviddisp->progressive_frame == 1; CUdeviceptr devptr; unsigned int pitch; cuvidCtxLock(vid_ctx_lock, 0); CUresult cuStatus = cuvidMapVideoFrame(dec, cuviddisp->picture_index, &devptr, &pitch, &proc_params); if (cuStatus != CUDA_SUCCESS) { qWarning("cuvidMapVideoFrame failed on index %d (%#x, %s)", cuviddisp->picture_index, cuStatus, _cudaGetErrorEnum(cuStatus)); cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); return false; } #define PAD_ALIGN(x,mask) ( (x + mask) & ~mask ) //uint w = dec_create_info.ulWidth;//PAD_ALIGN(dec_create_info.ulWidth, 0x3F); uint h = dec_create_info.ulHeight;//PAD_ALIGN(dec_create_info.ulHeight, 0x0F); //? #undef PAD_ALIGN int size = pitch*h*3/2; if (size > host_data_size && host_data) { cuMemFreeHost(host_data); host_data = 0; host_data_size = 0; } if (!host_data) { cuStatus = cuMemAllocHost((void**)&host_data, size); if (cuStatus != CUDA_SUCCESS) { qWarning("cuMemAllocHost failed (%#x, %s)", cuStatus, _cudaGetErrorEnum(cuStatus)); cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); return false; } host_data_size = size; } if (!host_data) { qWarning("No valid staging memory!"); cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); return false; } cuStatus = cuMemcpyDtoHAsync(host_data, devptr, size, stream); if (cuStatus != CUDA_SUCCESS) { qWarning("cuMemcpyDtoHAsync failed (%#x, %s)", cuStatus, _cudaGetErrorEnum(cuStatus)); cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); return false; } cuStatus = cuCtxSynchronize(); if (cuStatus != CUDA_SUCCESS) { qWarning("cuCtxSynchronize failed (%#x, %s)", cuStatus, _cudaGetErrorEnum(cuStatus)); } cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); //qDebug("mark not in use pic_index: %d", cuviddisp->picture_index); surface_in_use[cuviddisp->picture_index] = false; uchar *planes[] = { host_data, host_data + pitch * h }; int pitches[] = { (int)pitch, (int)pitch }; VideoFrame frame(codec_ctx->width, codec_ctx->height, VideoFormat::Format_NV12); frame.setBits(planes); frame.setBytesPerLine(pitches); //TODO: is clone required? may crash on clone, I should review clone() //frame = frame.clone(); if (outFrame) { *outFrame = frame.clone(); } #if COPY_ON_DECODE frame_queue.put(frame.clone()); #endif //qDebug("frame queue size: %d", frame_queue.size()); } return true; }
static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) { CuvidContext *ctx = avctx->priv_data; AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data; AVCUDADeviceContext *device_hwctx = device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; CUdeviceptr mapped_frame = 0; int ret = 0, eret = 0; av_log(avctx, AV_LOG_TRACE, "cuvid_output_frame\n"); if (ctx->decoder_flushing) { ret = cuvid_decode_packet(avctx, NULL); if (ret < 0 && ret != AVERROR_EOF) return ret; } ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); if (ret < 0) return ret; if (av_fifo_size(ctx->frame_queue)) { CuvidParsedFrame parsed_frame; CUVIDPROCPARAMS params; unsigned int pitch = 0; int offset = 0; int i; av_fifo_generic_read(ctx->frame_queue, &parsed_frame, sizeof(CuvidParsedFrame), NULL); memset(¶ms, 0, sizeof(params)); params.progressive_frame = parsed_frame.dispinfo.progressive_frame; params.second_field = parsed_frame.second_field; params.top_field_first = parsed_frame.dispinfo.top_field_first; ret = CHECK_CU(cuvidMapVideoFrame(ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, ¶ms)); if (ret < 0) goto error; if (avctx->pix_fmt == AV_PIX_FMT_CUDA) { ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n"); goto error; } ret = ff_decode_frame_props(avctx, frame); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n"); goto error; } for (i = 0; i < 2; i++) { CUDA_MEMCPY2D cpy = { .srcMemoryType = CU_MEMORYTYPE_DEVICE, .dstMemoryType = CU_MEMORYTYPE_DEVICE, .srcDevice = mapped_frame, .dstDevice = (CUdeviceptr)frame->data[i], .srcPitch = pitch, .dstPitch = frame->linesize[i], .srcY = offset, .WidthInBytes = FFMIN(pitch, frame->linesize[i]), .Height = avctx->height >> (i ? 1 : 0), }; ret = CHECK_CU(cuMemcpy2D(&cpy)); if (ret < 0) goto error; offset += avctx->coded_height; } } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
bool GLInteropResource::map(int picIndex, const CUVIDPROCPARAMS ¶m, GLuint tex, int w, int h, int H, int plane) { AutoCtxLock locker((cuda_api*)this, lock); Q_UNUSED(locker); if (!ensureResource(w, h, H, tex, plane)) // TODO surface size instead of frame size because we copy the device data return false; //CUDA_ENSURE(cuCtxPushCurrent(ctx), false); CUdeviceptr devptr; unsigned int pitch; CUDA_ENSURE(cuvidMapVideoFrame(dec, picIndex, &devptr, &pitch, const_cast<CUVIDPROCPARAMS*>(¶m)), false); CUVIDAutoUnmapper unmapper(this, dec, devptr); Q_UNUSED(unmapper); // TODO: why can not use res[plane].stream? CUDA_ERROR_INVALID_HANDLE CUDA_ENSURE(cuGraphicsMapResources(1, &res[plane].cuRes, 0), false); CUarray array; CUDA_ENSURE(cuGraphicsSubResourceGetMappedArray(&array, res[plane].cuRes, 0, 0), false); CUDA_MEMCPY2D cu2d; memset(&cu2d, 0, sizeof(cu2d)); cu2d.srcDevice = devptr; cu2d.srcMemoryType = CU_MEMORYTYPE_DEVICE; cu2d.srcPitch = pitch; cu2d.dstArray = array; cu2d.dstMemoryType = CU_MEMORYTYPE_ARRAY; cu2d.dstPitch = pitch; // the whole size or copy size? cu2d.WidthInBytes = pitch; cu2d.Height = h; if (plane == 1) { cu2d.srcXInBytes = 0;// +srcY*srcPitch + srcXInBytes cu2d.srcY = H; // skip the padding height cu2d.Height /= 2; } if (res[plane].stream) CUDA_ENSURE(cuMemcpy2DAsync(&cu2d, res[plane].stream), false); else CUDA_ENSURE(cuMemcpy2D(&cu2d), false); //TODO: delay cuCtxSynchronize && unmap. do it in unmap(tex)? // map to an already mapped resource will crash. sometimes I can not unmap the resource in unmap(tex) because if context switch error // so I simply unmap the resource here if (WORKAROUND_UNMAP_CONTEXT_SWITCH) { if (res[plane].stream) { //CUDA_WARN(cuCtxSynchronize(), false); //wait too long time? use cuStreamQuery? CUDA_WARN(cuStreamSynchronize(res[plane].stream)); //slower than CtxSynchronize } /* * This function provides the synchronization guarantee that any CUDA work issued * in \p stream before ::cuGraphicsUnmapResources() will complete before any * subsequently issued graphics work begins. * The graphics API from which \p resources were registered * should not access any resources while they are mapped by CUDA. If an * application does so, the results are undefined. */ CUDA_ENSURE(cuGraphicsUnmapResources(1, &res[plane].cuRes, 0), false); } else { // call it at last. current context will be used by other cuda calls (unmap() for example) CUDA_ENSURE(cuCtxPopCurrent(&ctx), false); // not required } return true; }
bool EGLInteropResource::map(int picIndex, const CUVIDPROCPARAMS ¶m, GLuint tex, int w, int h, int H, int plane) { // plane is always 0 because frame is rgb AutoCtxLock locker((cuda_api*)this, lock); Q_UNUSED(locker); if (!ensureResource(w, h, param.Reserved[0], H, tex)) // TODO surface size instead of frame size because we copy the device data return false; //CUDA_ENSURE(cuCtxPushCurrent(ctx), false); CUdeviceptr devptr; unsigned int pitch; CUDA_ENSURE(cuvidMapVideoFrame(dec, picIndex, &devptr, &pitch, const_cast<CUVIDPROCPARAMS*>(¶m)), false); CUVIDAutoUnmapper unmapper(this, dec, devptr); Q_UNUSED(unmapper); // TODO: why can not use res[plane].stream? CUDA_ERROR_INVALID_HANDLE CUDA_ENSURE(cuGraphicsMapResources(1, &res[plane].cuRes, 0), false); CUarray array; CUDA_ENSURE(cuGraphicsSubResourceGetMappedArray(&array, res[plane].cuRes, 0, 0), false); CUDA_ENSURE(cuGraphicsUnmapResources(1, &res[plane].cuRes, 0), false); // mapped array still accessible! CUDA_MEMCPY2D cu2d; memset(&cu2d, 0, sizeof(cu2d)); // Y plane cu2d.srcDevice = devptr; cu2d.srcMemoryType = CU_MEMORYTYPE_DEVICE; cu2d.srcPitch = pitch; cu2d.dstArray = array; cu2d.dstMemoryType = CU_MEMORYTYPE_ARRAY; cu2d.dstPitch = pitch; // the whole size or copy size? cu2d.WidthInBytes = res[plane].W; // the same value as texture9_nv12 cu2d.Height = H*3/2; if (res[plane].stream) CUDA_ENSURE(cuMemcpy2DAsync(&cu2d, res[plane].stream), false); else CUDA_ENSURE(cuMemcpy2D(&cu2d), false); //TODO: delay cuCtxSynchronize && unmap. do it in unmap(tex)? // map to an already mapped resource will crash. sometimes I can not unmap the resource in unmap(tex) because if context switch error // so I simply unmap the resource here if (WORKAROUND_UNMAP_CONTEXT_SWITCH) { if (res[plane].stream) { //CUDA_WARN(cuCtxSynchronize(), false); //wait too long time? use cuStreamQuery? CUDA_WARN(cuStreamSynchronize(res[plane].stream)); //slower than CtxSynchronize } /* * This function provides the synchronization guarantee that any CUDA work issued * in \p stream before ::cuGraphicsUnmapResources() will complete before any * subsequently issued graphics work begins. * The graphics API from which \p resources were registered * should not access any resources while they are mapped by CUDA. If an * application does so, the results are undefined. */ // CUDA_ENSURE(cuGraphicsUnmapResources(1, &res[plane].cuRes, 0), false); } D3DLOCKED_RECT rect_src, rect_dst; DX_ENSURE(texture9_nv12->LockRect(0, &rect_src, NULL, D3DLOCK_READONLY), false); DX_ENSURE(surface9_nv12->LockRect(&rect_dst, NULL, D3DLOCK_DISCARD), false); memcpy(rect_dst.pBits, rect_src.pBits, res[plane].W*H*3/2); // exactly w and h DX_ENSURE(surface9_nv12->UnlockRect(), false); DX_ENSURE(texture9_nv12->UnlockRect(0), false); #if 0 //IDirect3DSurface9 *raw_surface = NULL; //DX_ENSURE(texture9_nv12->GetSurfaceLevel(0, &raw_surface), false); const RECT src = { 0, 0, w, h*3/2}; DX_ENSURE(device9->StretchRect(raw_surface, &src, surface9_nv12, NULL, D3DTEXF_NONE), false); #endif if (!map(surface9_nv12, tex, w, h, H)) return false; return true; }
/* * Main transcoding thread * Initializes CUDA device, decodes frames with NVCUVID API and adds them to frame queue, which passes them to NVENC for encoding, then output */ void NVENCGUI::Transcode() { CUresult result; // initialize CUDA result = cuInit(0); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_INIT); return; } NVENCSTATUS nvStatus = NV_ENC_SUCCESS; // no input file if (encodeConfig.inputFileName == NULL) { emit Error(ERR_INPUT); return; } // no output file if (encodeConfig.outputFileName == NULL) { emit Error(ERR_OUTPUT); return; } // unable to open input file if (!fopen(encodeConfig.inputFileName, "r")) { emit Error(ERR_INPUT); return; } encodeConfig.fOutput = fopen(encodeConfig.outputFileName, "wb"); // unable to open output file if (encodeConfig.fOutput == NULL) { emit Error(ERR_OUTPUT); return; } // initialize CUDA on device and set CUDA context CUcontext cudaCtx; CUdevice device; result = cuDeviceGet(&device, encodeConfig.deviceID); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_DEVICE); return; } result = cuCtxCreate(&cudaCtx, CU_CTX_SCHED_AUTO, device); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_CTX); return; } // initialize NVCUVID context CUcontext curCtx; CUvideoctxlock ctxLock; result = cuCtxPopCurrent(&curCtx); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_CTX); return; } result = cuvidCtxLockCreate(&ctxLock, curCtx); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_CTX); return; } CudaDecoder* pDecoder = new CudaDecoder; FrameQueue* pFrameQueue = new CUVIDFrameQueue(ctxLock); pDecoder->InitVideoDecoder(encodeConfig.inputFileName, ctxLock, pFrameQueue, encodeConfig.width, encodeConfig.height); int decodedW, decodedH, decodedFRN, decodedFRD; pDecoder->GetCodecParam(&decodedW, &decodedH, &decodedFRN, &decodedFRD); // If the width/height is not set, set to same as source if (encodeConfig.width <= 0 || encodeConfig.height <= 0) { encodeConfig.width = decodedW; encodeConfig.height = decodedH; } // same, except for fps if (encodeConfig.fps <= 0) { if (decodedFRN <= 0 || decodedFRD <= 0) encodeConfig.fps = 30; else encodeConfig.fps = decodedFRN / decodedFRD; } // initialize frame queue with width/height pFrameQueue->init(encodeConfig.width, encodeConfig.height); VideoEncoder* pEncoder = new VideoEncoder(ctxLock); assert(pEncoder->GetHWEncoder()); // initialize NVENC HW Encoder nvStatus = pEncoder->GetHWEncoder()->Initialize(cudaCtx, NV_ENC_DEVICE_TYPE_CUDA); if (nvStatus != NV_ENC_SUCCESS) { emit Error(ERR_NVENC_ENC_INIT); return; } // get preset GUID encodeConfig.presetGUID = pEncoder->GetHWEncoder()->GetPresetGUID(encodeConfig.encoderPreset, encodeConfig.codec); // create encoder nvStatus = pEncoder->GetHWEncoder()->CreateEncoder(&encodeConfig); if (nvStatus != NV_ENC_SUCCESS) { emit Error(ERR_NVENC_ENC_CREATE); return; } // create buffer nvStatus = pEncoder->AllocateIOBuffers(&encodeConfig); if (nvStatus != NV_ENC_SUCCESS) { emit Error(ERR_NVENC_ENC_BUFFER); return; } // print details to text window, start counter emit PrintDetails(); NvQueryPerformanceCounter(&results.lStart); //start decoding thread #ifdef _WIN32 HANDLE decodeThread = CreateThread(NULL, 0, DecodeProc, (LPVOID)pDecoder, 0, NULL); #else pthread_t pid; pthread_create(&pid, NULL, DecodeProc, (void*)pDecoder); #endif int encodedFrames = 0; //start encoding thread while (!(pFrameQueue->isEndOfDecode() && pFrameQueue->isEmpty())) { CUVIDPARSERDISPINFO pInfo; if (pFrameQueue->dequeue(&pInfo)) { CUdeviceptr dMappedFrame = 0; unsigned int pitch; CUVIDPROCPARAMS oVPP = { 0 }; oVPP.unpaired_field = 1; oVPP.progressive_frame = 1; cuvidMapVideoFrame(pDecoder->GetDecoder(), pInfo.picture_index, &dMappedFrame, &pitch, &oVPP); EncodeFrameConfig stEncodeConfig = { 0 }; stEncodeConfig.dptr = dMappedFrame; stEncodeConfig.pitch = pitch; stEncodeConfig.width = encodeConfig.width; stEncodeConfig.height = encodeConfig.height; pEncoder->EncodeFrame(&stEncodeConfig); cuvidUnmapVideoFrame(pDecoder->GetDecoder(), dMappedFrame); pFrameQueue->releaseFrame(&pInfo); //emit IncrementEncodedFrames(); } } // flush pEncoder->EncodeFrame(NULL, true); // end decoding thread #ifdef _WIN32 WaitForSingleObject(decodeThread, INFINITE); #else pthread_join(pid, NULL); #endif // print transcoding details if (pEncoder->GetEncodedFrames() > 0) { results.decodedFrames = pDecoder->m_decodedFrames; results.encodedFrames = pEncoder->GetEncodedFrames(); NvQueryPerformanceCounter(&results.lEnd); NvQueryPerformanceFrequency(&results.lFreq); results.elapsedTime = (double)(results.lEnd - results.lStart) / (double)results.lFreq; } emit TranscodingEnd(); // clean up cuvidCtxLockDestroy(ctxLock); pEncoder->Deinitialize(); delete pDecoder; delete pEncoder; delete pFrameQueue; result = cuCtxDestroy(cudaCtx); if (result != CUDA_SUCCESS) { emit Error(ERR_CUDA_CTX_DESTROY); return; } return; }
static int cuvid_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt) { CuvidContext *ctx = avctx->priv_data; AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)ctx->hwdevice->data; AVCUDADeviceContext *device_hwctx = device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; AVFrame *frame = data; CUVIDSOURCEDATAPACKET cupkt; AVPacket filter_packet = { 0 }; AVPacket filtered_packet = { 0 }; CUdeviceptr mapped_frame = 0; int ret = 0, eret = 0; if (ctx->bsf && avpkt->size) { if ((ret = av_packet_ref(&filter_packet, avpkt)) < 0) { av_log(avctx, AV_LOG_ERROR, "av_packet_ref failed\n"); return ret; } if ((ret = av_bsf_send_packet(ctx->bsf, &filter_packet)) < 0) { av_log(avctx, AV_LOG_ERROR, "av_bsf_send_packet failed\n"); av_packet_unref(&filter_packet); return ret; } if ((ret = av_bsf_receive_packet(ctx->bsf, &filtered_packet)) < 0) { av_log(avctx, AV_LOG_ERROR, "av_bsf_receive_packet failed\n"); return ret; } avpkt = &filtered_packet; } ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); if (ret < 0) { av_packet_unref(&filtered_packet); return ret; } memset(&cupkt, 0, sizeof(cupkt)); if (avpkt->size) { cupkt.payload_size = avpkt->size; cupkt.payload = avpkt->data; if (avpkt->pts != AV_NOPTS_VALUE) { cupkt.flags = CUVID_PKT_TIMESTAMP; if (avctx->pkt_timebase.num && avctx->pkt_timebase.den) cupkt.timestamp = av_rescale_q(avpkt->pts, avctx->pkt_timebase, (AVRational){1, 10000000}); else cupkt.timestamp = avpkt->pts; } } else { cupkt.flags = CUVID_PKT_ENDOFSTREAM; } ret = CHECK_CU(cuvidParseVideoData(ctx->cuparser, &cupkt)); av_packet_unref(&filtered_packet); if (ret < 0) { goto error; } // cuvidParseVideoData doesn't return an error just because stuff failed... if (ctx->internal_error) { av_log(avctx, AV_LOG_ERROR, "cuvid decode callback error\n"); ret = ctx->internal_error; goto error; } if (av_fifo_size(ctx->frame_queue)) { CUVIDPARSERDISPINFO dispinfo; CUVIDPROCPARAMS params; unsigned int pitch = 0; int offset = 0; int i; av_fifo_generic_read(ctx->frame_queue, &dispinfo, sizeof(CUVIDPARSERDISPINFO), NULL); memset(¶ms, 0, sizeof(params)); params.progressive_frame = dispinfo.progressive_frame; params.second_field = 0; params.top_field_first = dispinfo.top_field_first; ret = CHECK_CU(cuvidMapVideoFrame(ctx->cudecoder, dispinfo.picture_index, &mapped_frame, &pitch, ¶ms)); if (ret < 0) goto error; if (avctx->pix_fmt == AV_PIX_FMT_CUDA) { ret = av_hwframe_get_buffer(ctx->hwframe, frame, 0); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "av_hwframe_get_buffer failed\n"); goto error; } ret = ff_decode_frame_props(avctx, frame); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "ff_decode_frame_props failed\n"); goto error; } for (i = 0; i < 2; i++) { CUDA_MEMCPY2D cpy = { .srcMemoryType = CU_MEMORYTYPE_DEVICE, .dstMemoryType = CU_MEMORYTYPE_DEVICE, .srcDevice = mapped_frame, .dstDevice = (CUdeviceptr)frame->data[i], .srcPitch = pitch, .dstPitch = frame->linesize[i], .srcY = offset, .WidthInBytes = FFMIN(pitch, frame->linesize[i]), .Height = avctx->coded_height >> (i ? 1 : 0), }; ret = CHECK_CU(cuMemcpy2D(&cpy)); if (ret < 0) goto error; offset += avctx->coded_height; } } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {