bool VideoDecoderCUDA::decode(const QByteArray &encoded) { if (!isAvailable()) return false; DPTR_D(VideoDecoderCUDA); if (!d.parser) { qWarning("CUVID parser not ready"); return false; } CUVIDSOURCEDATAPACKET cuvid_pkt; memset(&cuvid_pkt, 0, sizeof(CUVIDSOURCEDATAPACKET)); cuvid_pkt.payload = (unsigned char *)encoded.data(); cuvid_pkt.payload_size = encoded.size(); cuvid_pkt.flags = CUVID_PKT_TIMESTAMP; cuvid_pkt.timestamp = 0;// ? //TODO: fill NALU header for h264? https://devtalk.nvidia.com/default/topic/515571/what-the-data-format-34-cuvidparsevideodata-34-can-accept-/ { //cuvidCtxUnlock(d.vid_ctx_lock, 0); //TODO: why wrong context? CUresult cuStatus = cuvidParseVideoData(d.parser, &cuvid_pkt); if (cuStatus != CUDA_SUCCESS) { qWarning("cuMemcpyDtoH failed (%p, %s)", cuStatus, _cudaGetErrorEnum(cuStatus)); } } // callbacks are in the same thread as this. so no queue is required? qDebug("frame queue size on decode: %d", d.frame_queue.size()); return !d.frame_queue.isEmpty(); }
void check(T result, char const *const func, const char *const file, int const line) { if (result) { fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func); DEVICE_RESET // Make sure we call CUDA Device Reset before exiting exit(EXIT_FAILURE); } }
bool VideoDecoderCUDAPrivate::processDecodedData(CUVIDPARSERDISPINFO *cuviddisp, VideoFrame* outFrame) { int num_fields = cuviddisp->progressive_frame ? 1 : 2+cuviddisp->repeat_first_field; for (int active_field = 0; active_field < num_fields; ++active_field) { CUVIDPROCPARAMS proc_params; memset(&proc_params, 0, sizeof(CUVIDPROCPARAMS)); proc_params.progressive_frame = cuviddisp->progressive_frame; //check user config proc_params.second_field = active_field == 1; //check user config proc_params.top_field_first = cuviddisp->top_field_first; proc_params.unpaired_field = cuviddisp->progressive_frame == 1; CUdeviceptr devptr; unsigned int pitch; cuvidCtxLock(vid_ctx_lock, 0); CUresult cuStatus = cuvidMapVideoFrame(dec, cuviddisp->picture_index, &devptr, &pitch, &proc_params); if (cuStatus != CUDA_SUCCESS) { qWarning("cuvidMapVideoFrame failed on index %d (%#x, %s)", cuviddisp->picture_index, cuStatus, _cudaGetErrorEnum(cuStatus)); cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); return false; } #define PAD_ALIGN(x,mask) ( (x + mask) & ~mask ) //uint w = dec_create_info.ulWidth;//PAD_ALIGN(dec_create_info.ulWidth, 0x3F); uint h = dec_create_info.ulHeight;//PAD_ALIGN(dec_create_info.ulHeight, 0x0F); //? #undef PAD_ALIGN int size = pitch*h*3/2; if (size > host_data_size && host_data) { cuMemFreeHost(host_data); host_data = 0; host_data_size = 0; } if (!host_data) { cuStatus = cuMemAllocHost((void**)&host_data, size); if (cuStatus != CUDA_SUCCESS) { qWarning("cuMemAllocHost failed (%#x, %s)", cuStatus, _cudaGetErrorEnum(cuStatus)); cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); return false; } host_data_size = size; } if (!host_data) { qWarning("No valid staging memory!"); cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); return false; } cuStatus = cuMemcpyDtoHAsync(host_data, devptr, size, stream); if (cuStatus != CUDA_SUCCESS) { qWarning("cuMemcpyDtoHAsync failed (%#x, %s)", cuStatus, _cudaGetErrorEnum(cuStatus)); cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); return false; } cuStatus = cuCtxSynchronize(); if (cuStatus != CUDA_SUCCESS) { qWarning("cuCtxSynchronize failed (%#x, %s)", cuStatus, _cudaGetErrorEnum(cuStatus)); } cuvidUnmapVideoFrame(dec, devptr); cuvidCtxUnlock(vid_ctx_lock, 0); //qDebug("mark not in use pic_index: %d", cuviddisp->picture_index); surface_in_use[cuviddisp->picture_index] = false; uchar *planes[] = { host_data, host_data + pitch * h }; int pitches[] = { (int)pitch, (int)pitch }; VideoFrame frame(codec_ctx->width, codec_ctx->height, VideoFormat::Format_NV12); frame.setBits(planes); frame.setBytesPerLine(pitches); //TODO: is clone required? may crash on clone, I should review clone() //frame = frame.clone(); if (outFrame) { *outFrame = frame.clone(); } #if COPY_ON_DECODE frame_queue.put(frame.clone()); #endif //qDebug("frame queue size: %d", frame_queue.size()); } return true; }
NVENCSTATUS NVEncFilterDenoiseGauss::denoiseYV12(FrameInfo *pOutputFrame, const FrameInfo *pInputFrame) { NVENCSTATUS sts = NV_ENC_SUCCESS; if (m_pParam->frameOut.csp != m_pParam->frameIn.csp) { AddMessage(RGY_LOG_ERROR, _T("csp does not match.\n")); return NV_ENC_ERR_UNSUPPORTED_PARAM; } auto pGaussParam = std::dynamic_pointer_cast<NVEncFilterParamGaussDenoise>(m_pParam); if (!pGaussParam) { AddMessage(RGY_LOG_ERROR, _T("Invalid parameter type.\n")); return NV_ENC_ERR_INVALID_PARAM; } const auto supportedCspYV12High = make_array<RGY_CSP>(RGY_CSP_YV12_09, RGY_CSP_YV12_10, RGY_CSP_YV12_12, RGY_CSP_YV12_14, RGY_CSP_YV12_16); NppStatus nppsts = NPP_SUCCESS; if (pGaussParam->frameIn.csp == RGY_CSP_YV12) { nppsts = denoise_yv12<Npp8u>(pOutputFrame, pInputFrame, nppiFilterGaussBorder_8u_C1R, pGaussParam->masksize); if (nppsts != NPP_SUCCESS) { AddMessage(RGY_LOG_ERROR, _T("failed to denoise: %d, %s.\n"), nppsts, char_to_tstring(_cudaGetErrorEnum(nppsts)).c_str()); sts = NV_ENC_ERR_GENERIC; } } else if (std::find(supportedCspYV12High.begin(), supportedCspYV12High.end(), pGaussParam->frameIn.csp) != supportedCspYV12High.end()) { nppsts = denoise_yv12<Npp16u>(pOutputFrame, pInputFrame, nppiFilterGaussBorder_16u_C1R, pGaussParam->masksize); if (nppsts != NPP_SUCCESS) { AddMessage(RGY_LOG_ERROR, _T("failed to denoise: %d, %s.\n"), nppsts, char_to_tstring(_cudaGetErrorEnum(nppsts)).c_str()); sts = NV_ENC_ERR_GENERIC; } } else { AddMessage(RGY_LOG_ERROR, _T("unsupported csp.\n")); sts = NV_ENC_ERR_UNIMPLEMENTED; } return NV_ENC_SUCCESS; }
static void _cudaHandleError(cublasStatus_t status, const char *file, int line) { if (status != CUBLAS_STATUS_SUCCESS) { printf("%s in %s at line %d\n", _cudaGetErrorEnum(status), file, line); exit(EXIT_FAILURE); } }