Exemplo n.º 1
0
bool VideoDecoderCUDAPrivate::processDecodedData(CUVIDPARSERDISPINFO *cuviddisp, VideoFrame* outFrame) {
    int num_fields = cuviddisp->progressive_frame ? 1 : 2+cuviddisp->repeat_first_field;

    for (int active_field = 0; active_field < num_fields; ++active_field) {
        CUVIDPROCPARAMS proc_params;
        memset(&proc_params, 0, sizeof(CUVIDPROCPARAMS));
        proc_params.progressive_frame = cuviddisp->progressive_frame; //check user config
        proc_params.second_field = active_field == 1; //check user config
        proc_params.top_field_first = cuviddisp->top_field_first;
        proc_params.unpaired_field = cuviddisp->progressive_frame == 1;

        CUdeviceptr devptr;
        unsigned int pitch;
        cuvidCtxLock(vid_ctx_lock, 0);
        CUresult cuStatus = cuvidMapVideoFrame(dec, cuviddisp->picture_index, &devptr, &pitch, &proc_params);
        if (cuStatus != CUDA_SUCCESS) {
            qWarning("cuvidMapVideoFrame failed on index %d (%#x, %s)", cuviddisp->picture_index, cuStatus, _cudaGetErrorEnum(cuStatus));
            cuvidUnmapVideoFrame(dec, devptr);
            cuvidCtxUnlock(vid_ctx_lock, 0);
            return false;
        }
#define PAD_ALIGN(x,mask) ( (x + mask) & ~mask )
        //uint w = dec_create_info.ulWidth;//PAD_ALIGN(dec_create_info.ulWidth, 0x3F);
        uint h = dec_create_info.ulHeight;//PAD_ALIGN(dec_create_info.ulHeight, 0x0F); //?
#undef PAD_ALIGN
        int size = pitch*h*3/2;
        if (size > host_data_size && host_data) {
            cuMemFreeHost(host_data);
            host_data = 0;
            host_data_size = 0;
        }
        if (!host_data) {
            cuStatus = cuMemAllocHost((void**)&host_data, size);
            if (cuStatus != CUDA_SUCCESS) {
                qWarning("cuMemAllocHost failed (%#x, %s)", cuStatus, _cudaGetErrorEnum(cuStatus));
                cuvidUnmapVideoFrame(dec, devptr);
                cuvidCtxUnlock(vid_ctx_lock, 0);
                return false;
            }
            host_data_size = size;
        }
        if (!host_data) {
            qWarning("No valid staging memory!");
            cuvidUnmapVideoFrame(dec, devptr);
            cuvidCtxUnlock(vid_ctx_lock, 0);
            return false;
        }
        cuStatus = cuMemcpyDtoHAsync(host_data, devptr, size, stream);
        if (cuStatus != CUDA_SUCCESS) {
            qWarning("cuMemcpyDtoHAsync failed (%#x, %s)", cuStatus, _cudaGetErrorEnum(cuStatus));
            cuvidUnmapVideoFrame(dec, devptr);
            cuvidCtxUnlock(vid_ctx_lock, 0);
            return false;
        }
        cuStatus = cuCtxSynchronize();
        if (cuStatus != CUDA_SUCCESS) {
            qWarning("cuCtxSynchronize failed (%#x, %s)", cuStatus, _cudaGetErrorEnum(cuStatus));
        }
        cuvidUnmapVideoFrame(dec, devptr);
        cuvidCtxUnlock(vid_ctx_lock, 0);
        //qDebug("mark not in use pic_index: %d", cuviddisp->picture_index);
        surface_in_use[cuviddisp->picture_index] = false;

        uchar *planes[] = {
            host_data,
            host_data + pitch * h
        };
        int pitches[] = { (int)pitch, (int)pitch };
        VideoFrame frame(codec_ctx->width, codec_ctx->height, VideoFormat::Format_NV12);
        frame.setBits(planes);
        frame.setBytesPerLine(pitches);
        //TODO: is clone required? may crash on clone, I should review clone()
        //frame = frame.clone();
        if (outFrame) {
            *outFrame = frame.clone();
        }
#if COPY_ON_DECODE
        frame_queue.put(frame.clone());
#endif
        //qDebug("frame queue size: %d", frame_queue.size());
    }
    return true;
}
Exemplo n.º 2
0
void
VideoDecoder::unmapFrame(CUdeviceptr pDevice)
{
    CUresult oResult = cuvidUnmapVideoFrame(oDecoder_, pDevice);
    assert(CUDA_SUCCESS == oResult);
}
Exemplo n.º 3
0
 void unmapFrame(cuda::GpuMat& frame)
 {
     cuSafeCall( cuvidUnmapVideoFrame(decoder_, (CUdeviceptr) frame.data) );
     frame.release();
 }
Exemplo n.º 4
0
/*
 * Main transcoding thread
 * Initializes CUDA device, decodes frames with NVCUVID API and adds them to frame queue, which passes them to NVENC for encoding, then output
 */
void NVENCGUI::Transcode()
{
	CUresult result;

	// initialize CUDA
	result = cuInit(0);
	if (result != CUDA_SUCCESS)
	{
		emit Error(ERR_CUDA_INIT);
		return;
	}

	NVENCSTATUS nvStatus = NV_ENC_SUCCESS;

	// no input file
	if (encodeConfig.inputFileName == NULL)
	{
		emit Error(ERR_INPUT);
		return;
	}

	// no output file
	if (encodeConfig.outputFileName == NULL)
	{
		emit Error(ERR_OUTPUT);
		return;
	}

	// unable to open input file
	if (!fopen(encodeConfig.inputFileName, "r"))
	{
		emit Error(ERR_INPUT);
		return;
	}

	encodeConfig.fOutput = fopen(encodeConfig.outputFileName, "wb");
	// unable to open output file
	if (encodeConfig.fOutput == NULL)
	{
		emit Error(ERR_OUTPUT);
		return;
	}

	// initialize CUDA on device and set CUDA context
	CUcontext cudaCtx;
	CUdevice device;

	result = cuDeviceGet(&device, encodeConfig.deviceID);
	if (result != CUDA_SUCCESS)
	{
		emit Error(ERR_CUDA_DEVICE);
		return;
	}
	result = cuCtxCreate(&cudaCtx, CU_CTX_SCHED_AUTO, device);
	if (result != CUDA_SUCCESS)
	{
		emit Error(ERR_CUDA_CTX);
		return;
	}

	// initialize NVCUVID context
	CUcontext curCtx;
	CUvideoctxlock ctxLock;
	result = cuCtxPopCurrent(&curCtx);
	if (result != CUDA_SUCCESS)
	{
		emit Error(ERR_CUDA_CTX);
		return;
	}
	result = cuvidCtxLockCreate(&ctxLock, curCtx);
	if (result != CUDA_SUCCESS)
	{
		emit Error(ERR_CUDA_CTX);
		return;
	}

	CudaDecoder* pDecoder = new CudaDecoder;
	FrameQueue* pFrameQueue = new CUVIDFrameQueue(ctxLock);
	pDecoder->InitVideoDecoder(encodeConfig.inputFileName, ctxLock, pFrameQueue, encodeConfig.width, encodeConfig.height);

	int decodedW, decodedH, decodedFRN, decodedFRD;
	pDecoder->GetCodecParam(&decodedW, &decodedH, &decodedFRN, &decodedFRD);

	// If the width/height is not set, set to same as source
	if (encodeConfig.width <= 0 || encodeConfig.height <= 0) {
		encodeConfig.width = decodedW;
		encodeConfig.height = decodedH;
	}

	// same, except for fps
	if (encodeConfig.fps <= 0) {
		if (decodedFRN <= 0 || decodedFRD <= 0)
			encodeConfig.fps = 30;
		else
			encodeConfig.fps = decodedFRN / decodedFRD;
	}

	// initialize frame queue with width/height
	pFrameQueue->init(encodeConfig.width, encodeConfig.height);

	VideoEncoder* pEncoder = new VideoEncoder(ctxLock);
	assert(pEncoder->GetHWEncoder());

	// initialize NVENC HW Encoder
	nvStatus = pEncoder->GetHWEncoder()->Initialize(cudaCtx, NV_ENC_DEVICE_TYPE_CUDA);
	if (nvStatus != NV_ENC_SUCCESS)
	{
		emit Error(ERR_NVENC_ENC_INIT);
		return;
	}

	// get preset GUID
	encodeConfig.presetGUID = pEncoder->GetHWEncoder()->GetPresetGUID(encodeConfig.encoderPreset, encodeConfig.codec);

	// create encoder
	nvStatus = pEncoder->GetHWEncoder()->CreateEncoder(&encodeConfig);
	if (nvStatus != NV_ENC_SUCCESS)
	{
		emit Error(ERR_NVENC_ENC_CREATE);
		return;
	}

	// create buffer
	nvStatus = pEncoder->AllocateIOBuffers(&encodeConfig);
	if (nvStatus != NV_ENC_SUCCESS)
	{
		emit Error(ERR_NVENC_ENC_BUFFER);
		return;
	}

	// print details to text window, start counter
	emit PrintDetails();
	NvQueryPerformanceCounter(&results.lStart);

	//start decoding thread
#ifdef _WIN32
	HANDLE decodeThread = CreateThread(NULL, 0, DecodeProc, (LPVOID)pDecoder, 0, NULL);
#else
	pthread_t pid;
	pthread_create(&pid, NULL, DecodeProc, (void*)pDecoder);
#endif

	int encodedFrames = 0;

	//start encoding thread
	while (!(pFrameQueue->isEndOfDecode() && pFrameQueue->isEmpty())) 
	{
		CUVIDPARSERDISPINFO pInfo;
		if (pFrameQueue->dequeue(&pInfo)) 
		{
			CUdeviceptr dMappedFrame = 0;
			unsigned int pitch;
			CUVIDPROCPARAMS oVPP = { 0 };
			oVPP.unpaired_field = 1;
			oVPP.progressive_frame = 1;

			cuvidMapVideoFrame(pDecoder->GetDecoder(), pInfo.picture_index, &dMappedFrame, &pitch, &oVPP);

			EncodeFrameConfig stEncodeConfig = { 0 };
			stEncodeConfig.dptr = dMappedFrame;
			stEncodeConfig.pitch = pitch;
			stEncodeConfig.width = encodeConfig.width;
			stEncodeConfig.height = encodeConfig.height;
			pEncoder->EncodeFrame(&stEncodeConfig);

			cuvidUnmapVideoFrame(pDecoder->GetDecoder(), dMappedFrame);
			pFrameQueue->releaseFrame(&pInfo);
			//emit IncrementEncodedFrames();
		}
	}

	// flush
	pEncoder->EncodeFrame(NULL, true);

	// end decoding thread
#ifdef _WIN32
	WaitForSingleObject(decodeThread, INFINITE);
#else
	pthread_join(pid, NULL);
#endif

	// print transcoding details
	if (pEncoder->GetEncodedFrames() > 0)
	{
		results.decodedFrames = pDecoder->m_decodedFrames;
		results.encodedFrames = pEncoder->GetEncodedFrames();

		NvQueryPerformanceCounter(&results.lEnd);
		NvQueryPerformanceFrequency(&results.lFreq);
		results.elapsedTime = (double)(results.lEnd - results.lStart) / (double)results.lFreq;
	}
	emit TranscodingEnd();

	// clean up

	cuvidCtxLockDestroy(ctxLock);
	pEncoder->Deinitialize();
	delete pDecoder;
	delete pEncoder;
	delete pFrameQueue;

	result = cuCtxDestroy(cudaCtx);
	if (result != CUDA_SUCCESS)
	{
		emit Error(ERR_CUDA_CTX_DESTROY);
		return;
	}

	return;
}