mfxStatus IntelDecoder::FlushDecoderAndRender() { mfxStatus sts = MFX_ERR_NONE; mfxGetTime(&tStart); // // Stage 2: Retrieve the buffered decoded frames // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_SURFACE == sts) { if (MFX_WRN_DEVICE_BUSY == sts) MSDK_SLEEP(1); // Wait if device is busy, then repeat the same call to DecodeFrameAsync nIndex = GetFreeSurfaceIndex(pmfxSurfaces, numSurfaces); // Find free frame surface MSDK_CHECK_ERROR(MFX_ERR_NOT_FOUND, nIndex, MFX_ERR_MEMORY_ALLOC); // Decode a frame asychronously (returns immediately) sts = mfxDEC->DecodeFrameAsync(NULL, pmfxSurfaces[nIndex], &pmfxOutSurface, &syncp); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncp) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) sts = pSession->SyncOperation(syncp, 60000); // Synchronize. Waits until decoded frame is ready if (MFX_ERR_NONE == sts) { ++nFrame; if (impl_type == MFX_IMPL_SOFTWARE) { outMan.Render(pmfxOutSurface); } else { // Surface locking required when read/write D3D surfaces sts = pMfxAllocator->Lock(pMfxAllocator->pthis, pmfxOutSurface->Data.MemId, &(pmfxOutSurface->Data)); MSDK_BREAK_ON_ERROR(sts); outMan.Render(pmfxOutSurface); sts = pMfxAllocator->Unlock(pMfxAllocator->pthis, pmfxOutSurface->Data.MemId, &(pmfxOutSurface->Data)); } printf("Frame number: %d\r", nFrame); fflush(stdout); } } // MFX_ERR_MORE_DATA indicates that all buffers has been fetched, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxGetTime(&tEnd); elapsed += TimeDiffMsec(tEnd, tStart) / 1000; double fps = ((double)nFrame / elapsed); printf("\nExecution time: %3.2f s (%3.2f fps)\n", elapsed, fps); return sts; }
INT CmSurfaceManager::CreateSurface2D(CmOsResource * pCmOsResource, BOOL bIsCmCreated, CmSurface2D * &pSurface2D) { UINT handle = 0; UINT index = m_pCmDevice->ValidSurfaceIndexStart(); INT result = 0; UINT width = 0; UINT height = 0; UINT pitch = 0; CM_SURFACE_FORMAT format = CM_SURFACE_FORMAT_UNKNOWN; if (pCmOsResource == NULL) { return CM_INVALID_GENOS_RESOURCE_HANDLE; } pSurface2D = NULL; result = GetSurfaceInfo(pCmOsResource, width, height, pitch, format); if (result != CM_SUCCESS) { CM_ASSERT(0); return result; } result = Surface2DSanityCheck(width, height, format); if (result != CM_SUCCESS) { CM_ASSERT(0); return result; } if (GetFreeSurfaceIndex(index) != CM_SUCCESS) { CM_ASSERT(0); return CM_EXCEED_SURFACE_AMOUNT; } if (m_2DSurfaceCount >= m_max2DSurfaceCount) { CM_ASSERT(0); return CM_EXCEED_SURFACE_AMOUNT; } result = AllocateSurface2D(width, height, format, pCmOsResource, handle); if (result != CM_SUCCESS) { CM_ASSERT(0); return result; } result = CmSurface2D::Create(index, handle, width, height, pitch, format, bIsCmCreated, this, pSurface2D); if (result != CM_SUCCESS) { FreeSurface2D(handle); CM_ASSERT(0); return result; } m_SurfaceArray[index] = pSurface2D; UPDATE_PROFILE_FOR_2D_SURFACE(index, width, height, format, FALSE); return CM_SUCCESS; }
HRESULT CDXVADecoderMpeg2::DecodeFrameInternal (BYTE* pDataIn, UINT nSize, REFERENCE_TIME rtStart, REFERENCE_TIME rtStop) { HRESULT hr = S_FALSE; int nSurfaceIndex = -1; CComPtr<IMediaSample> pSampleToDeliver; int nFieldType = -1; int nSliceType = -1; bool bIsField = false; int bFrame_repeat_pict = 0; CHECK_HR_FALSE (FFMpeg2DecodeFrame (&m_PictureParams, &m_QMatrixData, m_SliceInfo, &m_nSliceCount, m_pFilter->GetAVCtx(), m_pFilter->GetFrame(), &m_nNextCodecIndex, &nFieldType, &nSliceType, pDataIn, nSize, &bIsField, &bFrame_repeat_pict)); // Wait I frame after a flush if (m_bFlushed && (!m_PictureParams.bPicIntra || (bIsField && m_PictureParams.bSecondField))) { TRACE_MPEG2 ("CDXVADecoderMpeg2::DecodeFrame() : Flush - wait I frame, %ws\n", FrameType(bIsField, m_PictureParams.bSecondField)); return S_FALSE; } CHECK_HR (GetFreeSurfaceIndex (nSurfaceIndex, &pSampleToDeliver, rtStart, rtStop)); if (!bIsField || (bIsField && !m_PictureParams.bSecondField)) { UpdatePictureParams(nSurfaceIndex); } TRACE_MPEG2 ("CDXVADecoderMpeg2::DecodeFrame() : Surf = %d, PictureType = %d, %ws, m_nNextCodecIndex = %d, rtStart = [%I64d]\n", nSurfaceIndex, nSliceType, FrameType(bIsField, m_PictureParams.bSecondField), m_nNextCodecIndex, rtStart); { CHECK_HR (BeginFrame(nSurfaceIndex, pSampleToDeliver)); // Send picture parameters CHECK_HR (AddExecuteBuffer (DXVA2_PictureParametersBufferType, sizeof(m_PictureParams), &m_PictureParams)); // Add quantization matrix CHECK_HR (AddExecuteBuffer (DXVA2_InverseQuantizationMatrixBufferType, sizeof(m_QMatrixData), &m_QMatrixData)); // Add slice control CHECK_HR (AddExecuteBuffer (DXVA2_SliceControlBufferType, sizeof (DXVA_SliceInfo)*m_nSliceCount, &m_SliceInfo)); // Add bitstream CHECK_HR (AddExecuteBuffer (DXVA2_BitStreamDateBufferType, nSize, pDataIn, &nSize)); // Decode frame CHECK_HR (Execute()); CHECK_HR (EndFrame(nSurfaceIndex)); } bool bAdded = AddToStore (nSurfaceIndex, pSampleToDeliver, (m_PictureParams.bPicBackwardPrediction != 1), rtStart, rtStop, bIsField, (FF_FIELD_TYPE)nFieldType, (FF_SLICE_TYPE)nSliceType, FFGetCodedPicture(m_pFilter->GetAVCtx())); if (bAdded) { hr = DisplayNextFrame(); } m_bFlushed = false; return hr; }
// === Public functions HRESULT CDXVADecoderMpeg2::DecodeFrame (BYTE* pDataIn, UINT nSize, REFERENCE_TIME rtStart, REFERENCE_TIME rtStop) { HRESULT hr; int nSurfaceIndex; CComPtr<IMediaSample> pSampleToDeliver; int nFieldType; int nSliceType; FFMpeg2DecodeFrame (&m_PictureParams, &m_QMatrixData, m_SliceInfo, &m_nSliceCount, m_pFilter->GetAVCtx(), m_pFilter->GetFrame(), &m_nNextCodecIndex, &nFieldType, &nSliceType, pDataIn, nSize); // Wait I frame after a flush if (m_bFlushed && ! m_PictureParams.bPicIntra) return S_FALSE; hr = GetFreeSurfaceIndex (nSurfaceIndex, &pSampleToDeliver, rtStart, rtStop); if (FAILED (hr)) { ASSERT (hr == VFW_E_NOT_COMMITTED); // Normal when stop playing return hr; } CHECK_HR (BeginFrame(nSurfaceIndex, pSampleToDeliver)); UpdatePictureParams(nSurfaceIndex); TRACE_MPEG2 ("=> %s %I64d Surf=%d\n", GetFFMpegPictureType(nSliceType), rtStart, nSurfaceIndex); TRACE_MPEG2("CDXVADecoderMpeg2 : Decode frame %i\n", m_PictureParams.bPicScanMethod); CHECK_HR (AddExecuteBuffer (DXVA2_PictureParametersBufferType, sizeof(m_PictureParams), &m_PictureParams)); CHECK_HR (AddExecuteBuffer (DXVA2_InverseQuantizationMatrixBufferType, sizeof(m_QMatrixData), &m_QMatrixData)); // Send bitstream to accelerator CHECK_HR (AddExecuteBuffer (DXVA2_SliceControlBufferType, sizeof (DXVA_SliceInfo)*m_nSliceCount, &m_SliceInfo)); CHECK_HR (AddExecuteBuffer (DXVA2_BitStreamDateBufferType, nSize, pDataIn, &nSize)); // Decode frame CHECK_HR (Execute()); CHECK_HR (EndFrame(nSurfaceIndex)); AddToStore (nSurfaceIndex, pSampleToDeliver, (m_PictureParams.bPicBackwardPrediction != 1), rtStart, rtStop, false,(FF_FIELD_TYPE)nFieldType, (FF_SLICE_TYPE)nSliceType, FFGetCodedPicture(m_pFilter->GetAVCtx())); m_bFlushed = false; return DisplayNextFrame(); }
INT CmSurfaceManager::CreateSurface2DUP(UINT width, UINT height, CM_SURFACE_FORMAT format, void *pSysMem, CmSurface2DUP * &pSurface2D) { pSurface2D = NULL; UINT index = m_pCmDevice->ValidSurfaceIndexStart(); if (GetFreeSurfaceIndex(index) != CM_SUCCESS) { return CM_EXCEED_SURFACE_AMOUNT; } if (m_2DUPSurfaceCount >= m_max2DUPSurfaceCount) { CM_ASSERT(0); return CM_EXCEED_SURFACE_AMOUNT; } UINT handle = 0; INT result = AllocateSurface2DUP(width, height, format, pSysMem, handle); if (result != CM_SUCCESS) { CM_ASSERT(0); return result; } result = CmSurface2DUP::Create(index, handle, width, height, format, this, pSurface2D); if (result != CM_SUCCESS) { FreeSurface2DUP(handle); CM_ASSERT(0); return result; } m_SurfaceArray[index] = pSurface2D; m_2DUPSurfaceCount++; UINT sizeperpixel = 1; GetFormatSize(format, sizeperpixel); m_SurfaceSizes[index] = width * height * sizeperpixel; return CM_SUCCESS; }
extern "C" __declspec(dllexport) int encodeBitmap(IntelEncoderHandle *pHandle, void *pBitmap, void **ppBuffer) { mfxStatus sts = MFX_ERR_NONE; mfxSyncPoint syncp; int nEncSurfIdx = 0; // Find free frame surface nEncSurfIdx = GetFreeSurfaceIndex(pHandle->ppEncSurfaces,pHandle->nEncSurfNum); // TODO: //sts = LoadRawFrame(pEncSurfaces[nEncSurfIdx], fSource, true); for (;;) { sts = MFXVideoENCODE_EncodeFrameAsync(pHandle->session, NULL, pHandle->ppEncSurfaces[nEncSurfIdx], &pHandle->mfxBS, &syncp); if (MFX_ERR_NONE < sts && !syncp) { // Repeat the call if warning and no output if (MFX_WRN_DEVICE_BUSY == sts) Sleep(5); } else if (MFX_ERR_NONE < sts && syncp) { // Ignore warnings if output is available sts = MFX_ERR_NONE; break; } else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) { return 0; } else break; } if (MFX_ERR_NONE == sts){ sts = MFXVideoCORE_SyncOperation(pHandle->session, syncp, 60000); // Synchronize. Wait until encoded frame is ready if (MFX_ERR_NONE != sts) return 0; *ppBuffer = pHandle->mfxBS.Data + pHandle->mfxBS.DataOffset; int iResult = (int) pHandle->mfxBS.DataLength; pHandle->mfxBS.DataLength = 0; return iResult; } }
INT CmSurfaceManager::AllocateSurfaceIndex(UINT width, UINT height, UINT depth, CM_SURFACE_FORMAT format, UINT & freeIndex, BOOL & useNewSurface, void *pSysMem) { UINT index = m_pCmDevice->ValidSurfaceIndexStart(); if ((m_bufferCount >= m_maxBufferCount && width && !height && !depth) || (m_2DSurfaceCount >= m_max2DSurfaceCount && width && height && !depth)) { if (!TouchSurfaceInPoolForDestroy()) { CM_ASSERT(0); return CM_FAILURE; } } if (m_pCmDevice->IsSurfaceReuseEnabled() && !pSysMem) { index = GetReuseSurfaceIndex(width, height, depth, format); if (index) { useNewSurface = FALSE; freeIndex = index; m_SurfaceReleased[index] = FALSE; UPDATE_STATE_FOR_SURFACE_REUSE(index); return CM_SUCCESS; } } if (GetFreeSurfaceIndex(index) != CM_SUCCESS) { return CM_FAILURE; } useNewSurface = TRUE; freeIndex = index; m_SurfaceReleased[index] = FALSE; return CM_SUCCESS; }
// === Public functions HRESULT CDXVADecoderMpeg2::DecodeFrame(BYTE* pDataIn, UINT nSize, REFERENCE_TIME rtStart, REFERENCE_TIME rtStop) { HRESULT hr; int nFieldType; int nSliceType; FFMpeg2DecodeFrame(&m_PictureParams, &m_QMatrixData, m_SliceInfo, &m_nSliceCount, m_pFilter->GetAVCtx(), m_pFilter->GetFrame(), &m_nNextCodecIndex, &nFieldType, &nSliceType, pDataIn, nSize); if (m_PictureParams.bSecondField && !m_bSecondField) { m_bSecondField = true; } // Wait I frame after a flush if (m_bFlushed && (!m_PictureParams.bPicIntra || (m_bSecondField && m_PictureParams.bSecondField))) { TRACE_MPEG2("CDXVADecoderMpeg2::DecodeFrame() : Flush - wait I frame\n"); return S_FALSE; } if (m_bSecondField) { if (!m_PictureParams.bSecondField) { m_rtStart = rtStart; m_rtStop = rtStop; m_pSampleToDeliver = NULL; hr = GetFreeSurfaceIndex(m_nSurfaceIndex, &m_pSampleToDeliver, rtStart, rtStop); if (FAILED(hr)) { ASSERT(hr == VFW_E_NOT_COMMITTED); // Normal when stop playing return hr; } } } else { m_rtStart = rtStart; m_rtStop = rtStop; m_pSampleToDeliver = NULL; hr = GetFreeSurfaceIndex(m_nSurfaceIndex, &m_pSampleToDeliver, rtStart, rtStop); if (FAILED(hr)) { ASSERT(hr == VFW_E_NOT_COMMITTED); // Normal when stop playing return hr; } } if (m_pSampleToDeliver == NULL) { return S_FALSE; } CHECK_HR_TRACE(BeginFrame(m_nSurfaceIndex, m_pSampleToDeliver)); if (m_bSecondField) { if (!m_PictureParams.bSecondField) { UpdatePictureParams(m_nSurfaceIndex); } } else { UpdatePictureParams(m_nSurfaceIndex); } TRACE_MPEG2("CDXVADecoderMpeg2::DecodeFrame() : Surf = %d, PictureType = %d, SecondField = %d, m_nNextCodecIndex = %d, rtStart = [%I64d]\n", m_nSurfaceIndex, nSliceType, m_PictureParams.bSecondField, m_nNextCodecIndex, rtStart); CHECK_HR_TRACE(AddExecuteBuffer(DXVA2_PictureParametersBufferType, sizeof(m_PictureParams), &m_PictureParams)); CHECK_HR_TRACE(AddExecuteBuffer(DXVA2_InverseQuantizationMatrixBufferType, sizeof(m_QMatrixData), &m_QMatrixData)); // Send bitstream to accelerator CHECK_HR_TRACE(AddExecuteBuffer(DXVA2_SliceControlBufferType, sizeof(DXVA_SliceInfo)*m_nSliceCount, &m_SliceInfo)); CHECK_HR_TRACE(AddExecuteBuffer(DXVA2_BitStreamDateBufferType, nSize, pDataIn, &nSize)); // Decode frame CHECK_HR_TRACE(Execute()); CHECK_HR_TRACE(EndFrame(m_nSurfaceIndex)); if (m_bSecondField) { if (m_PictureParams.bSecondField) { AddToStore(m_nSurfaceIndex, m_pSampleToDeliver, (m_PictureParams.bPicBackwardPrediction != 1), m_rtStart, m_rtStop, false, (FF_FIELD_TYPE)nFieldType, (FF_SLICE_TYPE)nSliceType, FFGetCodedPicture(m_pFilter->GetAVCtx())); hr = DisplayNextFrame(); } } else { AddToStore(m_nSurfaceIndex, m_pSampleToDeliver, (m_PictureParams.bPicBackwardPrediction != 1), m_rtStart, m_rtStop, false, (FF_FIELD_TYPE)nFieldType, (FF_SLICE_TYPE)nSliceType, FFGetCodedPicture(m_pFilter->GetAVCtx())); hr = DisplayNextFrame(); } m_bFlushed = false; return hr; }
mfxStatus QSV_Encoder_Internal::Encode(uint64_t ts, uint8_t *pDataY, uint8_t *pDataUV, uint32_t strideY, uint32_t strideUV, mfxBitstream **pBS) { mfxStatus sts = MFX_ERR_NONE; *pBS = NULL; int nSurfIdx = GetFreeSurfaceIndex(m_pmfxSurfaces, m_nSurfNum); // Find free input frame surface int nTaskIdx = GetFreeTaskIndex(m_pTaskPool, m_nTaskPool); if (MFX_ERR_NOT_FOUND == nTaskIdx || MFX_ERR_NOT_FOUND == nSurfIdx) { // No more free tasks or surfaces, need to sync sts = m_session.SyncOperation(m_pTaskPool[m_nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxU8 *pTemp = m_outBitstream.Data; memcpy(&m_outBitstream, &m_pTaskPool[m_nFirstSyncTask].mfxBS, sizeof(mfxBitstream)); m_pTaskPool[m_nFirstSyncTask].mfxBS.Data = pTemp; m_pTaskPool[m_nFirstSyncTask].mfxBS.DataLength = 0; m_pTaskPool[m_nFirstSyncTask].mfxBS.DataOffset = 0; m_pTaskPool[m_nFirstSyncTask].syncp = NULL; m_nFirstSyncTask = (m_nFirstSyncTask + 1) % m_nTaskPool; *pBS = &m_outBitstream; if (nTaskIdx == MFX_ERR_NOT_FOUND) nTaskIdx = GetFreeTaskIndex(m_pTaskPool, m_nTaskPool); if (nSurfIdx == MFX_ERR_NOT_FOUND) nSurfIdx = GetFreeSurfaceIndex(m_pmfxSurfaces, m_nSurfNum); } mfxFrameSurface1 *pSurface = m_pmfxSurfaces[nSurfIdx]; sts = m_mfxAllocator.Lock(m_mfxAllocator.pthis, pSurface->Data.MemId, &(pSurface->Data)); sts = LoadNV12(pSurface, pDataY, pDataUV, strideY, strideUV); pSurface->Data.TimeStamp = ts; sts = m_mfxAllocator.Unlock(m_mfxAllocator.pthis, pSurface->Data.MemId, &(pSurface->Data)); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); for (;;) { // Encode a frame asychronously (returns immediately) sts = m_pmfxENC->EncodeFrameAsync(NULL, pSurface, &m_pTaskPool[nTaskIdx].mfxBS, &m_pTaskPool[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !m_pTaskPool[nTaskIdx].syncp) { // Repeat the call if warning and no output if (MFX_WRN_DEVICE_BUSY == sts) MSDK_SLEEP(1); // Wait if device is busy, then repeat the same call } else if (MFX_ERR_NONE < sts && m_pTaskPool[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // Ignore warnings if output is available break; } else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) { // Allocate more bitstream buffer memory here if needed... break; } else break; } return sts; }
int MSDKVpp::HandleProcess() { if (m_vppCfg.comp_num != m_mapMediaBuf.size()) { VPP_TRACE_ERROR("[MSDKVpp]-----Something wrong here, all sinkpads detached\n"); return -1; } std::map<MSDKDecodeVpp*, MediaBuf>::iterator it = m_mapMediaBuf.begin(); mfxFrameSurface1* pInSurface = NULL; mfxStatus sts = MFX_ERR_NONE; int nIndex = 0; m_tCompStc = 0; while(!m_bWantToStop) { usleep(1000); if (VPP_COMP == m_mode) { //composite case, break if no more data when preparing vpp frames int prepare_result = PrepareVppCompFrames(); if (-1 == prepare_result) break; if (1 == prepare_result) { m_bEndOfStream = true; break; } //printf("[MSDKVpp %p]-----Prepare composite frames over\n", this); } else { if (!m_bAccessNextElem) continue; if (it->second.pRingBuf->IsEmpty()) { //No data if (it->first->GetDataEos()) //No more data in the furture pInSurface = NULL; else continue; } else { it->second.pRingBuf->Get(pInSurface); } } if (!m_bInit) { m_mapMediaBuf.begin()->second.pRingBuf->Get(pInSurface); Locker<Mutex> l(m_xMsdkInit); sts = InitVpp(pInSurface); if (MFX_ERR_NONE == sts) { m_bInit = true; VPP_TRACE_INFO("[MSDKVpp]VPP element %p init successfully\n", this); } else { VPP_TRACE_ERROR("[MSDKVpp]VPP create failed: %d\n", sts); break; } } if (m_bReinit) { //Re-init VPP. mfxU32 before_change = GetSysTime(); VPP_TRACE_INFO("[MSDKVpp]stop/init vpp\n"); m_pVpp->Close(); VPP_TRACE_INFO("Re-init VPP...\n"); m_mapMediaBuf.begin()->second.pRingBuf->Get(pInSurface); { Locker<Mutex> l(m_xMsdkReinit); sts = InitVpp(pInSurface); m_bReinit = false; } VPP_TRACE_INFO("[MSDKVpp]Re-Init VPP takes time %u(us)\n", GetSysTime()-before_change); } //malloc memory in InitVpp(pInSurface), then will get free surface slot nIndex = GetFreeSurfaceIndex(m_pSurfacePool, m_nSurfaces); if (MFX_ERR_NOT_FOUND == nIndex) { //printf("[MSDKVpp]-----Cann't get free surface slot, check if not free\n"); continue; } if (VPP_COMP == m_mode) { if (m_pMeasuremnt) { m_pMeasuremnt->GetLock(); m_pMeasuremnt->TimeStpStart(VPP_FRAME_TIME_STAMP, this); m_pMeasuremnt->RelLock(); } for (it = m_mapMediaBuf.begin(); it != m_mapMediaBuf.end(); ++it) { it->second.pRingBuf->Pop(pInSurface); //printf("[MSDKVpp %p]Get pInSurface %p succ\n", pInSurface); sts = DoingVpp(pInSurface, m_pSurfacePool[nIndex]); pInSurface->Data.Locked--; } //printf("[MSDKVpp %p]-----Composite multiple frames into one frame successfully\n", this); } else { sts = DoingVpp(pInSurface, m_pSurfacePool[nIndex]); m_bAccessNextElem = false; dynamic_cast<MSDKVpp*>(it->first)->ReleaseSurface(); if (!pInSurface) //resize case, break when be notified end of stream break; } } if (!m_bWantToStop) { VPP_TRACE_INFO("[%p]This work flow finish video processing and will be stopped.\n", this); m_bWantToStop = true; } m_rCallback.StopTrain(); return 0; }
INT CmSurfaceManager::CreateSurface2D(UINT width, UINT height, UINT pitch, BOOL bIsCmCreated, CM_SURFACE_FORMAT format, CmSurface2D * &pSurface2D) { UINT handle = 0; UINT index = m_pCmDevice->ValidSurfaceIndexStart(); INT result = 0; pSurface2D = NULL; result = Surface2DSanityCheck(width, height, format); if (result != CM_SUCCESS) { CM_ASSERT(0); return result; } if (bIsCmCreated) { BOOL useNewSurface = TRUE; if (AllocateSurfaceIndex (width, height, 0, format, index, useNewSurface, NULL) != CM_SUCCESS) { return CM_EXCEED_SURFACE_AMOUNT; } if (!useNewSurface) { CmSurface *pSurface = m_SurfaceArray[index]; if (pSurface && (pSurface->Type() == CM_ENUM_CLASS_TYPE_CMSURFACE2D)) { pSurface2D = static_cast < CmSurface2D * >(pSurface); } else { return CM_FAILURE; } UpdateSurface2D(pSurface2D, width, height, format); UPDATE_PROFILE_FOR_2D_SURFACE(index, width, height, format, TRUE); return CM_SUCCESS; } } else { if (GetFreeSurfaceIndex(index) != CM_SUCCESS) { return CM_EXCEED_SURFACE_AMOUNT; } } if (m_2DSurfaceCount >= m_max2DSurfaceCount) { CM_ASSERT(0); return CM_EXCEED_SURFACE_AMOUNT; } result = AllocateSurface2D(width, height, format, handle, pitch); if (result != CM_SUCCESS) { CM_ASSERT(0); return result; } result = CmSurface2D::Create(index, handle, width, height, pitch, format, TRUE, this, pSurface2D); if (result != CM_SUCCESS) { FreeSurface2D(handle); CM_ASSERT(0); return result; } m_SurfaceArray[index] = pSurface2D; UPDATE_PROFILE_FOR_2D_SURFACE(index, width, height, format, FALSE); return CM_SUCCESS; }
HRESULT CDXVADecoderH264::DecodeFrame (BYTE* pDataIn, UINT nSize, REFERENCE_TIME rtStart, REFERENCE_TIME rtStop) { HRESULT hr = S_FALSE; CH264Nalu Nalu; UINT nSlices = 0; int nSurfaceIndex; int nFieldType; int nSliceType; int nFramePOC; IDirect3DSurface9* pSampleToDeliver; int nDXIndex = 0; UINT nNalOffset = 0; int nOutPOC; REFERENCE_TIME rtOutStart; if(pDataIn == NULL || nSize == 0) return S_FALSE; Nalu.SetBuffer (pDataIn, nSize, m_nNALLength); FFH264DecodeBuffer (m_pFilter->GetAVCtx(), pDataIn, nSize, &nFramePOC, &nOutPOC, &rtOutStart); //CLog::Log(LOGDEBUG, "nFramePOC = %d nOutPOC %d rtOutStart%d", nFramePOC, nOutPOC, rtOutStart); while (Nalu.ReadNext()) { switch (Nalu.GetType()) { case NALU_TYPE_SLICE: case NALU_TYPE_IDR: if(m_bUseLongSlice) { m_pSliceLong[nSlices].BSNALunitDataLocation = nNalOffset; m_pSliceLong[nSlices].SliceBytesInBuffer = Nalu.GetDataLength()+3; //.GetRoundedDataLength(); m_pSliceLong[nSlices].slice_id = nSlices; FF264UpdateRefFrameSliceLong(&m_DXVAPicParams, &m_pSliceLong[nSlices], m_pFilter->GetAVCtx()); if (nSlices>0) m_pSliceLong[nSlices-1].NumMbsForSlice = m_pSliceLong[nSlices].NumMbsForSlice = m_pSliceLong[nSlices].first_mb_in_slice - m_pSliceLong[nSlices-1].first_mb_in_slice; } nSlices++; nNalOffset += (UINT)(Nalu.GetDataLength() + 3); if (nSlices > MAX_SLICES) break; break; } } if (nSlices == 0) return S_FALSE; m_nMaxWaiting = min (max (m_DXVAPicParams.num_ref_frames, 3), 8); // If parsing fail (probably no PPS/SPS), continue anyway it may arrived later (happen on truncated streams) if (FAILED (FFH264BuildPicParams (&m_DXVAPicParams, &m_DXVAScalingMatrix, &nFieldType, &nSliceType, m_pFilter->GetAVCtx(), m_pFilter->GetPCIVendor()))) return S_FALSE; // Wait I frame after a flush if (m_bFlushed && !m_DXVAPicParams.IntraPicFlag) return S_FALSE; CHECK_HR (GetFreeSurfaceIndex (nSurfaceIndex, &pSampleToDeliver, rtStart, rtStop)); FFH264SetCurrentPicture (nSurfaceIndex, &m_DXVAPicParams, m_pFilter->GetAVCtx()); CHECK_HR (BeginFrame(pSampleToDeliver)); m_DXVAPicParams.StatusReportFeedbackNumber++; // TRACE("CDXVADecoderH264 : Decode frame %u\n", m_DXVAPicParams.StatusReportFeedbackNumber); // Send picture parameters CHECK_HR (AddExecuteBuffer (DXVA2_PictureParametersBufferType, sizeof(m_DXVAPicParams), &m_DXVAPicParams)); CHECK_HR (Execute()); // Add bitstream, slice control and quantization matrix CHECK_HR (AddExecuteBuffer (DXVA2_BitStreamDateBufferType, nSize, pDataIn, &nSize)); if (m_bUseLongSlice) { CHECK_HR(AddExecuteBuffer(DXVA2_SliceControlBufferType, sizeof(DXVA_Slice_H264_Long)*nSlices, m_pSliceLong)); } else { CHECK_HR (AddExecuteBuffer (DXVA2_SliceControlBufferType, sizeof (DXVA_Slice_H264_Short)*nSlices, m_pSliceShort)); } CHECK_HR (AddExecuteBuffer (DXVA2_InverseQuantizationMatrixBufferType, sizeof (DXVA_Qmatrix_H264), (void*)&m_DXVAScalingMatrix)); // Decode bitstream CHECK_HR (Execute()); CHECK_HR (EndFrame(nSurfaceIndex)); #ifdef _DEBUG //DisplayStatus(); #endif bool bAdded = AddToStore (nSurfaceIndex, m_DXVAPicParams.RefPicFlag, rtStart, rtStop, m_DXVAPicParams.field_pic_flag, (FF_FIELD_TYPE)nFieldType, (FF_SLICE_TYPE)nSliceType, nFramePOC); FFH264UpdateRefFramesList (&m_DXVAPicParams, m_pFilter->GetAVCtx()); ClearUnusedRefFrames(); if (bAdded) { hr = DisplayNextFrame(); if (nOutPOC != -1) { m_nOutPOC = nOutPOC; m_rtOutStart = rtOutStart; } } m_bFlushed = false; return hr; }
mfxStatus IntelDecoder::RunDecodeAndRender() { mfxStatus sts = MFX_ERR_NONE; // =============================================================== // Start decoding the frames from the stream // mfxGetTime(&tStart); pmfxOutSurface = NULL; pmfxOutSurface_sw = NULL; nIndex = 0; nIndex2 = 0; nFrame = 0; // // Stage 1: Main decoding loop // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts) { if (MFX_WRN_DEVICE_BUSY == sts) MSDK_SLEEP(1); // Wait if device is busy, then repeat the same call to DecodeFrameAsync if (MFX_ERR_MORE_DATA == sts) { sts = ReadBitStreamData(&mfxBS, fSource); // Read more data into input bit stream MSDK_BREAK_ON_ERROR(sts); } if (MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE == sts) { nIndex = GetFreeSurfaceIndex(pmfxSurfaces, numSurfaces); // Find free frame surface MSDK_CHECK_ERROR(MFX_ERR_NOT_FOUND, nIndex, MFX_ERR_MEMORY_ALLOC); } // Decode a frame asychronously (returns immediately) // - If input bitstream contains multiple frames DecodeFrameAsync will start decoding multiple frames, and remove them from bitstream sts = mfxDEC->DecodeFrameAsync(&mfxBS, pmfxSurfaces[nIndex], &pmfxOutSurface, &syncp); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncp) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) sts = pSession->SyncOperation(syncp, 60000); // Synchronize. Wait until decoded frame is ready if (MFX_ERR_NONE == sts) { ++nFrame; if (impl_type == MFX_IMPL_SOFTWARE) { outMan.Render(pmfxOutSurface); } else { // Surface locking required when read/write video surfaces sts = pMfxAllocator->Lock(pMfxAllocator->pthis, pmfxOutSurface->Data.MemId, &(pmfxOutSurface->Data)); MSDK_BREAK_ON_ERROR(sts); outMan.Render(pmfxOutSurface); sts = pMfxAllocator->Unlock(pMfxAllocator->pthis, pmfxOutSurface->Data.MemId, &(pmfxOutSurface->Data)); MSDK_BREAK_ON_ERROR(sts); } printf("Frame number: %d\r", nFrame); fflush(stdout); } } // MFX_ERR_MORE_DATA means that file has ended, need to go to buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxGetTime(&tEnd); elapsed = TimeDiffMsec(tEnd, tStart) / 1000; double fps = ((double)nFrame / elapsed); printf("\nExecution time: %3.2f s (%3.2f fps)\n", elapsed, fps); return sts; }
// === Public functions HRESULT TDXVADecoderVC1::DecodeFrame(BYTE* pDataIn, UINT nSize, REFERENCE_TIME rtStart, REFERENCE_TIME rtStop) { HRESULT hr; int nSurfaceIndex; CComPtr<IMediaSample> pSampleToDeliver; int nFieldType, nSliceType; UINT nFrameSize, nSize_Result; m_pCodec->libavcodec->FFVC1UpdatePictureParam(&m_PictureParams, m_pCodec->avctx, &nFieldType, &nSliceType, pDataIn, nSize, &nFrameSize, FALSE, &m_bFrame_repeat_pict); if (m_pCodec->libavcodec->FFIsSkipped(m_pCodec->avctx)) { return S_OK; } // Wait I frame after a flush if (m_bFlushed && ! m_PictureParams.bPicIntra) { return S_FALSE; } hr = GetFreeSurfaceIndex(nSurfaceIndex, &pSampleToDeliver, rtStart, rtStop); if (FAILED(hr)) { ASSERT(hr == VFW_E_NOT_COMMITTED); // Normal when stop playing return hr; } CHECK_HR(BeginFrame(nSurfaceIndex, pSampleToDeliver)); DPRINTF(_l("TDXVADecoderVC1::DecodeFrame - PictureType = %s, rtStart = %I64d Surf=%d\n"), m_pCodec->libavcodec->GetFFMpegPictureType(nSliceType), rtStart, nSurfaceIndex); m_PictureParams.wDecodedPictureIndex = nSurfaceIndex; m_PictureParams.wDeblockedPictureIndex = m_PictureParams.wDecodedPictureIndex; // Manage reference picture list if (!m_PictureParams.bPicBackwardPrediction) { if (m_wRefPictureIndex[0] != NO_REF_FRAME) { RemoveRefFrame(m_wRefPictureIndex[0]); } m_wRefPictureIndex[0] = m_wRefPictureIndex[1]; m_wRefPictureIndex[1] = nSurfaceIndex; } m_PictureParams.wForwardRefPictureIndex = (m_PictureParams.bPicIntra == 0) ? m_wRefPictureIndex[0] : NO_REF_FRAME; m_PictureParams.wBackwardRefPictureIndex = (m_PictureParams.bPicBackwardPrediction == 1) ? m_wRefPictureIndex[1] : NO_REF_FRAME; m_PictureParams.bPic4MVallowed = (m_PictureParams.wBackwardRefPictureIndex == NO_REF_FRAME && m_PictureParams.bPicStructure == 3) ? 1 : 0; m_PictureParams.bPicDeblockConfined |= (m_PictureParams.wBackwardRefPictureIndex == NO_REF_FRAME) ? 0x04 : 0; m_PictureParams.bPicScanMethod++; // Use for status reporting sections 3.8.1 and 3.8.2 DPRINTF(_l("TDXVADecoderVC1::DecodeFrame - Decode frame %i\n"), m_PictureParams.bPicScanMethod); // Send picture params to accelerator CHECK_HR(AddExecuteBuffer(DXVA2_PictureParametersBufferType, sizeof(m_PictureParams), &m_PictureParams)); // Send bitstream to accelerator CHECK_HR(AddExecuteBuffer(DXVA2_BitStreamDateBufferType, nFrameSize ? nFrameSize : nSize, pDataIn, &nSize_Result)); m_SliceInfo.wQuantizerScaleCode = 1; // TODO : 1->31 ??? m_SliceInfo.dwSliceBitsInBuffer = nSize_Result * 8; CHECK_HR(AddExecuteBuffer(DXVA2_SliceControlBufferType, sizeof(m_SliceInfo), &m_SliceInfo)); // Decode frame CHECK_HR(Execute()); CHECK_HR(EndFrame(nSurfaceIndex)); // *************** if (nFrameSize) { // Decoding Second Field m_pCodec->libavcodec->FFVC1UpdatePictureParam(&m_PictureParams, m_pCodec->avctx, NULL, NULL, pDataIn, nSize, NULL, TRUE, &m_bFrame_repeat_pict); CHECK_HR(BeginFrame(nSurfaceIndex, pSampleToDeliver)); DPRINTF(_l("TDXVADecoderVC1::DecodeFrame - PictureType = %s\n"), m_pCodec->libavcodec->GetFFMpegPictureType(nSliceType)); CHECK_HR(AddExecuteBuffer(DXVA2_PictureParametersBufferType, sizeof(m_PictureParams), &m_PictureParams)); // Send bitstream to accelerator CHECK_HR(AddExecuteBuffer(DXVA2_BitStreamDateBufferType, nSize - nFrameSize, pDataIn + nFrameSize, &nSize_Result)); m_SliceInfo.wQuantizerScaleCode = 1; // TODO : 1->31 ??? m_SliceInfo.dwSliceBitsInBuffer = nSize_Result * 8; CHECK_HR(AddExecuteBuffer(DXVA2_SliceControlBufferType, sizeof(m_SliceInfo), &m_SliceInfo)); // Decode frame CHECK_HR(Execute()); CHECK_HR(EndFrame(nSurfaceIndex)); } // *************** #ifdef _DEBUG DisplayStatus(); #endif // Re-order B frames if (m_pCodec->isReorderBFrame()) { if (m_PictureParams.bPicBackwardPrediction == 1) { SwapRT(rtStart, m_rtStartDelayed); SwapRT(rtStop, m_rtStopDelayed); } else { // Save I or P reference time (swap later) if (!m_bFlushed) { if (m_nDelayedSurfaceIndex != -1) { UpdateStore(m_nDelayedSurfaceIndex, m_rtStartDelayed, m_rtStopDelayed); } m_rtStartDelayed = m_rtStopDelayed = _I64_MAX; SwapRT(rtStart, m_rtStartDelayed); SwapRT(rtStop, m_rtStopDelayed); m_nDelayedSurfaceIndex = nSurfaceIndex; } } } AddToStore(nSurfaceIndex, pSampleToDeliver, (m_PictureParams.bPicBackwardPrediction != 1), rtStart, rtStop, false, (FF_FIELD_TYPE)nFieldType, (FF_SLICE_TYPE)nSliceType, 0); m_bFlushed = false; return DisplayNextFrame(); }
int main() { mfxStatus sts = MFX_ERR_NONE; mfxU16 inputWidth = 1920; mfxU16 inputHeight = 1080; // ===================================================================== // Intel Media SDK encode pipeline setup // - In this example we are encoding an AVC (H.264) stream // - Video memory surfaces are used // - Asynchronous operation by executing more than one encode operation simultaneously // // Open input YV12 YUV file FILE* fSource; fopen_s(&fSource, "bbb1920x1080.yuv", "rb"); MSDK_CHECK_POINTER(fSource, MFX_ERR_NULL_PTR); // Create output elementary stream (ES) H.264 file FILE* fSink; fopen_s(&fSink, "test_d3d_async.264", "wb"); MSDK_CHECK_POINTER(fSink, MFX_ERR_NULL_PTR); // Initialize Media SDK session // - MFX_IMPL_AUTO_ANY selects HW accelaration if available (on any adapter) // - Version 1.0 is selected for greatest backwards compatibility. // If more recent API features are needed, change the version accordingly mfxIMPL impl = MFX_IMPL_AUTO_ANY; #ifdef DX11_D3D impl |= MFX_IMPL_VIA_D3D11; #endif mfxVersion ver = {0, 1}; MFXVideoSession mfxSession; sts = mfxSession.Init(impl, &ver); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create DirectX device context mfxHDL deviceHandle; sts = CreateHWDevice(mfxSession, &deviceHandle, NULL); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Provide device manager to Media SDK sts = mfxSession.SetHandle(DEVICE_MGR_TYPE, deviceHandle); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxFrameAllocator mfxAllocator; mfxAllocator.Alloc = simple_alloc; mfxAllocator.Free = simple_free; mfxAllocator.Lock = simple_lock; mfxAllocator.Unlock = simple_unlock; mfxAllocator.GetHDL = simple_gethdl; // When using video memory we must provide Media SDK with an external allocator sts = mfxSession.SetFrameAllocator(&mfxAllocator); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize encoder parameters mfxVideoParam mfxEncParams; memset(&mfxEncParams, 0, sizeof(mfxEncParams)); mfxEncParams.mfx.CodecId = MFX_CODEC_AVC; mfxEncParams.mfx.TargetUsage = MFX_TARGETUSAGE_BALANCED; mfxEncParams.mfx.TargetKbps = 2000; mfxEncParams.mfx.RateControlMethod = MFX_RATECONTROL_VBR; mfxEncParams.mfx.FrameInfo.FrameRateExtN = 30; mfxEncParams.mfx.FrameInfo.FrameRateExtD = 1; mfxEncParams.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12; mfxEncParams.mfx.FrameInfo.ChromaFormat = MFX_CHROMAFORMAT_YUV420; mfxEncParams.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; mfxEncParams.mfx.FrameInfo.CropX = 0; mfxEncParams.mfx.FrameInfo.CropY = 0; mfxEncParams.mfx.FrameInfo.CropW = inputWidth; mfxEncParams.mfx.FrameInfo.CropH = inputHeight; // Width must be a multiple of 16 // Height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture mfxEncParams.mfx.FrameInfo.Width = MSDK_ALIGN16(inputWidth); mfxEncParams.mfx.FrameInfo.Height = (MFX_PICSTRUCT_PROGRESSIVE == mfxEncParams.mfx.FrameInfo.PicStruct)? MSDK_ALIGN16(inputHeight) : MSDK_ALIGN32(inputHeight); mfxEncParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY; // Configure Media SDK to keep more operations in flight // - AsyncDepth represents the number of tasks that can be submitted, before synchronizing is required // - The choice of AsyncDepth = 4 is quite arbitrary but has proven to result in good performance mfxEncParams.AsyncDepth = 4; // Create Media SDK encoder MFXVideoENCODE mfxENC(mfxSession); // Validate video encode parameters (optional) // - In this example the validation result is written to same structure // - MFX_WRN_INCOMPATIBLE_VIDEO_PARAM is returned if some of the video parameters are not supported, // instead the encoder will select suitable parameters closest matching the requested configuration sts = mfxENC.Query(&mfxEncParams, &mfxEncParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_INCOMPATIBLE_VIDEO_PARAM); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number of required surfaces for encoder mfxFrameAllocRequest EncRequest; memset(&EncRequest, 0, sizeof(EncRequest)); sts = mfxENC.QueryIOSurf(&mfxEncParams, &EncRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); #ifdef DX11_D3D EncRequest.Type |= WILL_WRITE; // Hint to DX11 memory handler that application will write data to input surfaces #endif // Allocate required surfaces mfxFrameAllocResponse mfxResponse; sts = mfxAllocator.Alloc(mfxAllocator.pthis, &EncRequest, &mfxResponse); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxU16 nEncSurfNum = mfxResponse.NumFrameActual; // Allocate surface headers (mfxFrameSurface1) for decoder mfxFrameSurface1** pmfxSurfaces = new mfxFrameSurface1*[nEncSurfNum]; MSDK_CHECK_POINTER(pmfxSurfaces, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nEncSurfNum; i++) { pmfxSurfaces[i] = new mfxFrameSurface1; memset(pmfxSurfaces[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pmfxSurfaces[i]->Info), &(mfxEncParams.mfx.FrameInfo), sizeof(mfxFrameInfo)); pmfxSurfaces[i]->Data.MemId = mfxResponse.mids[i]; // MID (memory id) represent one D3D NV12 surface #ifndef ENABLE_INPUT // In case simulating direct access to frames we initialize the allocated surfaces with default pattern // - For true benchmark comparisons to async workloads all surfaces must have the same data #ifndef DX11_D3D IDirect3DSurface9 *pSurface; D3DSURFACE_DESC desc; D3DLOCKED_RECT locked; pSurface = (IDirect3DSurface9 *)mfxResponse.mids[i]; pSurface->GetDesc(&desc); pSurface->LockRect(&locked, 0, D3DLOCK_NOSYSLOCK); memset((mfxU8 *)locked.pBits, 100, desc.Height*locked.Pitch); // Y plane memset((mfxU8 *)locked.pBits + desc.Height * locked.Pitch, 50, (desc.Height*locked.Pitch)/2); // UV plane pSurface->UnlockRect(); #else // For now, just leave D3D11 surface data uninitialized #endif #endif } // Initialize the Media SDK encoder sts = mfxENC.Init(&mfxEncParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Retrieve video parameters selected by encoder. // - BufferSizeInKB parameter is required to set bit stream buffer size mfxVideoParam par; memset(&par, 0, sizeof(par)); sts = mfxENC.GetVideoParam(&par); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create task pool to improve asynchronous performance (greater GPU utilization) mfxU16 taskPoolSize = mfxEncParams.AsyncDepth; // number of tasks that can be submitted, before synchronizing is required Task* pTasks = new Task[taskPoolSize]; memset(pTasks, 0, sizeof(Task) * taskPoolSize); for(int i=0;i<taskPoolSize;i++) { // Prepare Media SDK bit stream buffer pTasks[i].mfxBS.MaxLength = par.mfx.BufferSizeInKB * 1000; pTasks[i].mfxBS.Data = new mfxU8[pTasks[i].mfxBS.MaxLength]; MSDK_CHECK_POINTER(pTasks[i].mfxBS.Data, MFX_ERR_MEMORY_ALLOC); } // =================================== // Start encoding the frames // #ifdef ENABLE_BENCHMARK LARGE_INTEGER tStart, tEnd; QueryPerformanceFrequency(&tStart); double freq = (double)tStart.QuadPart; QueryPerformanceCounter(&tStart); #endif int nEncSurfIdx = 0; int nTaskIdx = 0; int nFirstSyncTask = 0; mfxU32 nFrame = 0; // // Stage 1: Main encoding loop // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = mfxSession.SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT printf("Frame number: %d\r", nFrame); #endif } else { nEncSurfIdx = GetFreeSurfaceIndex(pmfxSurfaces, nEncSurfNum); // Find free frame surface if (MFX_ERR_NOT_FOUND == nEncSurfIdx) return MFX_ERR_MEMORY_ALLOC; // Surface locking required when read/write D3D surfaces sts = mfxAllocator.Lock(mfxAllocator.pthis, pmfxSurfaces[nEncSurfIdx]->Data.MemId, &(pmfxSurfaces[nEncSurfIdx]->Data)); MSDK_BREAK_ON_ERROR(sts); sts = LoadRawFrame(pmfxSurfaces[nEncSurfIdx], fSource); MSDK_BREAK_ON_ERROR(sts); sts = mfxAllocator.Unlock(mfxAllocator.pthis, pmfxSurfaces[nEncSurfIdx]->Data.MemId, &(pmfxSurfaces[nEncSurfIdx]->Data)); MSDK_BREAK_ON_ERROR(sts); for (;;) { // Encode a frame asychronously (returns immediately) sts = mfxENC.EncodeFrameAsync(NULL, pmfxSurfaces[nEncSurfIdx], &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // Repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // Wait if device is busy, then repeat the same call } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // Ignore warnings if output is available break; } else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) { // Allocate more bitstream buffer memory here if needed... break; } else break; } } } // MFX_ERR_MORE_DATA means that the input file has ended, need to go to buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 2: Retrieve the buffered encoded frames // while (MFX_ERR_NONE <= sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = mfxSession.SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT printf("Frame number: %d\r", nFrame); #endif } else { for (;;) { // Encode a frame asychronously (returns immediately) sts = mfxENC.EncodeFrameAsync(NULL, NULL, &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // Repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // Wait if device is busy, then repeat the same call } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // Ignore warnings if output is available break; } else break; } } } // MFX_ERR_MORE_DATA indicates that there are no more buffered frames, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 3: Sync all remaining tasks in task pool // while(pTasks[nFirstSyncTask].syncp) { sts = mfxSession.SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT printf("Frame number: %d\r", nFrame); #endif } #ifdef ENABLE_BENCHMARK QueryPerformanceCounter(&tEnd); double duration = ((double)tEnd.QuadPart - (double)tStart.QuadPart) / freq; printf("\nExecution time: %3.2fs (%3.2ffps)\n", duration, nFrame/duration); #endif // =================================================================== // Clean up resources // - It is recommended to close Media SDK components first, before releasing allocated surfaces, since // some surfaces may still be locked by internal Media SDK resources. mfxENC.Close(); // mfxSession closed automatically on destruction for (int i = 0; i < nEncSurfNum; i++) delete pmfxSurfaces[i]; MSDK_SAFE_DELETE_ARRAY(pmfxSurfaces); for(int i=0;i<taskPoolSize;i++) MSDK_SAFE_DELETE_ARRAY(pTasks[i].mfxBS.Data); MSDK_SAFE_DELETE_ARRAY(pTasks); fclose(fSource); fclose(fSink); CleanupHWDevice(); return 0; }
int MSDKEncode::HandleProcess() { mfxStatus sts = MFX_ERR_NONE; mfxFrameSurface1* pFrameSurface = NULL; mfxSyncPoint syncpE; mfxEncodeCtrl* pEncCtrl = NULL; int startP = -1; #ifndef CONFIG_READ_RAW_BUFFER std::map<MSDKVpp*, RING_BUFFER*>::iterator it = m_mapRingBuf.begin(); #endif while (!m_bWantToStop) { usleep(1000); //Check if need to generate key frame if (m_bForceKeyFrame) pEncCtrl = &m_encCtrl; else pEncCtrl = NULL; #ifndef CONFIG_READ_RAW_BUFFER if (it->second->IsEmpty()) //No data { //printf("[MSDKEncode]-----There's no more data, just continue or exit the main loop\n"); if (it->first->GetDataEos()) //No more data in the furture { pEncCtrl = NULL; pFrameSurface = NULL; } else continue; } else { if (MASTER == m_type) { if (!m_bAccessNextElem) continue; it->second->Get(pFrameSurface); } else it->second->Pop(pFrameSurface); } //printf("[MSDKEncode]-----Get next frame surface successfully\n"); #endif if (!m_bInit) { #ifdef CONFIG_READ_RAW_BUFFER sts = InitEncoder(NULL); #else sts = InitEncoder(pFrameSurface); #endif if (MFX_ERR_NONE == sts) { if (m_pMeasuremnt) { m_pMeasuremnt->GetLock(); pipelineinfo einfo; einfo.mElementType = m_type; einfo.mChannelNum = MSDKBase::nEncChannels; MSDKBase::nEncChannels++; m_pMeasuremnt->SetElementInfo(ENC_ENDURATION_TIME_STAMP, this, &einfo); m_pMeasuremnt->TimeStpStart(ENC_ENDURATION_TIME_STAMP, this); m_pMeasuremnt->RelLock(); } m_bInit = true; H264E_TRACE_INFO("[MSDKEncode]Encoder %p init successfully\n", this); } else { H264E_TRACE_ERROR("Encode init failed: %d\n", sts); return -1; } } #ifdef CONFIG_READ_RAW_BUFFER int nIndex = GetFreeSurfaceIndex(m_pSurfacePool, m_nSurfaces); //Find free frame surface slot if (MFX_ERR_NOT_FOUND == nIndex) continue; else pFrameSurface = m_pSurfacePool[nIndex]; sts = LoadRawFrame(m_pSurfacePool[nIndex]); if (MFX_ERR_MORE_DATA == sts) { if (m_pInputMem->GetDataEof()) { //No more data in the future pEncCtrl = NULL; pFrameSurface = NULL; } else { continue; } } //printf("[MSDKEncode]-----Get the free surface slot and complete load of the raw frame\n"); #endif if (m_pMeasuremnt) { m_pMeasuremnt->GetLock(); m_pMeasuremnt->TimeStpStart(ENC_FRAME_TIME_STAMP, this); m_pMeasuremnt->RelLock(); } for (;;) { //Encode a frame asychronously(returns immediately) sts = m_pEncode->EncodeFrameAsync(pEncCtrl, pFrameSurface, &m_outputBs, &syncpE); //printf("[MSDKEncode]-----EncodeFrameAsync ret code: %d\n", sts); if (sts > MFX_ERR_NONE && !syncpE) { //Repeat the call if warning and no output if (MFX_WRN_DEVICE_BUSY == sts) usleep(1000); //wait if device is busy, then repeat the same call } else { if (sts > MFX_ERR_NONE && syncpE) sts = MFX_ERR_NONE; //ignore warnings if output is available if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) H264E_TRACE_WARNI("[MSDKEncode]-----The size of buffer allocated for encoder is too small\n"); break; } } if (MFX_ERR_NONE == sts) { sts = m_pSession->SyncOperation(syncpE, 60000); if (m_pMeasuremnt) { m_pMeasuremnt->GetLock(); m_pMeasuremnt->TimeStpFinish(ENC_FRAME_TIME_STAMP, this); m_pMeasuremnt->RelLock(); } //check if output is key frame: startP = 0-key frame otherwise not //A group of pictures sizeof(GROUP_OF_PICTURE*2) have a key frame startP = (++startP)%GROUP_OF_PICTURE; //release surface pool slot if (MASTER == m_type) { m_bAccessNextElem = false; it->first->ReleaseSurface(); } else { pFrameSurface->Data.Locked--; } m_pNotify->OnGetMSDKCodecData(m_outputBs.Data+m_outputBs.DataOffset, m_outputBs.DataLength, (!startP || m_bForceKeyFrame), m_nLogicIndex); m_outputBs.DataLength = 0; if (m_bForceKeyFrame) { m_bForceKeyFrame = false; //printf("[MSDKEncode]-----Force key frame successfully\n"); startP = 0; } } if (!pFrameSurface) { if (m_pMeasuremnt) { m_pMeasuremnt->GetLock(); m_pMeasuremnt->TimeStpFinish(ENC_ENDURATION_TIME_STAMP, this); m_pMeasuremnt->RelLock(); } break; } } if (!m_bWantToStop) { H264E_TRACE_INFO("[MSDKEncode]Got EOS in Encoder %p\n", this); m_bWantToStop = true; } return 0; }
DWORD WINAPI TranscodeThread(LPVOID arg) { ThreadData *pData = (ThreadData *)arg; int id = pData->id; mfxStatus sts = MFX_ERR_NONE; // ===================================================================== // Intel Media SDK transcode opaque pipeline setup // - Transcode H.264 to H.264, resizing the encoded stream to half the resolution using VPP // - Multiple streams are transcoded concurrently // - Same input stream is used for all concurrent threadcoding threads // // Open input H.264 elementary stream (ES) file FILE* fSource; char inFile[100] = "bbb640x480.264"; fopen_s(&fSource, inFile, "rb"); MSDK_CHECK_POINTER(fSource, MFX_ERR_NULL_PTR); // Create output elementary stream (ES) H.264 file FILE* fSink; char outFile[100] = "bbb320x240_xx.264"; outFile[11] = '0' + (char)(id/10); outFile[12] = '0' + (char)(id%10); fopen_s(&fSink, outFile, "wb"); MSDK_CHECK_POINTER(fSink, MFX_ERR_NULL_PTR); MFXVideoSession* pmfxSession = NULL; // Initialize Media SDK session // - MFX_IMPL_AUTO_ANY selects HW accelaration if available (on any adapter) // - Version 1.3 is selected since the opaque memory feature was added in this API release // If more recent API features are needed, change the version accordingly mfxIMPL impl = MFX_IMPL_AUTO_ANY; mfxVersion ver = {3, 1}; // Note: API 1.3 ! pmfxSession = new MFXVideoSession; MSDK_CHECK_POINTER(pmfxSession, MFX_ERR_NULL_PTR); sts = pmfxSession->Init(impl, &ver); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create Media SDK decoder & encoder & VPP MFXVideoDECODE* pmfxDEC = new MFXVideoDECODE(*pmfxSession); MSDK_CHECK_POINTER(pmfxDEC, MFX_ERR_NULL_PTR); MFXVideoENCODE* pmfxENC = new MFXVideoENCODE(*pmfxSession); MSDK_CHECK_POINTER(pmfxENC, MFX_ERR_NULL_PTR); MFXVideoVPP* pmfxVPP = new MFXVideoVPP(*pmfxSession); MSDK_CHECK_POINTER(pmfxVPP, MFX_ERR_NULL_PTR); // Set required video parameters for decode mfxVideoParam mfxDecParams; memset(&mfxDecParams, 0, sizeof(mfxDecParams)); mfxDecParams.mfx.CodecId = MFX_CODEC_AVC; mfxDecParams.IOPattern = MFX_IOPATTERN_OUT_OPAQUE_MEMORY; // Configure Media SDK to keep more operations in flight // - AsyncDepth represents the number of tasks that can be submitted, before synchronizing is required // - The choice of AsyncDepth = 3 is quite arbitrary but has proven to result in good performance mfxDecParams.AsyncDepth = 3; // Prepare Media SDK bit stream buffer for decoder // - Arbitrary buffer size for this example mfxBitstream mfxBS; memset(&mfxBS, 0, sizeof(mfxBS)); mfxBS.MaxLength = 1024 * 1024; mfxBS.Data = new mfxU8[mfxBS.MaxLength]; MSDK_CHECK_POINTER(mfxBS.Data, MFX_ERR_MEMORY_ALLOC); // Read a chunk of data from stream file into bit stream buffer // - Parse bit stream, searching for header and fill video parameters structure // - Abort if bit stream header is not found in the first bit stream buffer chunk sts = ReadBitStreamData(&mfxBS, fSource); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = pmfxDEC->DecodeHeader(&mfxBS, &mfxDecParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize VPP parameters mfxVideoParam VPPParams; memset(&VPPParams, 0, sizeof(VPPParams)); // Input data VPPParams.vpp.In.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.In.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.In.CropX = 0; VPPParams.vpp.In.CropY = 0; VPPParams.vpp.In.CropW = mfxDecParams.mfx.FrameInfo.CropW; VPPParams.vpp.In.CropH = mfxDecParams.mfx.FrameInfo.CropH; VPPParams.vpp.In.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.In.FrameRateExtN = 30; VPPParams.vpp.In.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.In.Width = MSDK_ALIGN16(VPPParams.vpp.In.CropW); VPPParams.vpp.In.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.In.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.In.CropH) : MSDK_ALIGN32(VPPParams.vpp.In.CropH); // Output data VPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.Out.CropX = 0; VPPParams.vpp.Out.CropY = 0; VPPParams.vpp.Out.CropW = VPPParams.vpp.In.CropW/2; // Half the resolution of decode stream VPPParams.vpp.Out.CropH = VPPParams.vpp.In.CropH/2; VPPParams.vpp.Out.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.Out.FrameRateExtN = 30; VPPParams.vpp.Out.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.Out.Width = MSDK_ALIGN16(VPPParams.vpp.Out.CropW); VPPParams.vpp.Out.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.Out.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.Out.CropH) : MSDK_ALIGN32(VPPParams.vpp.Out.CropH); VPPParams.IOPattern = MFX_IOPATTERN_IN_OPAQUE_MEMORY | MFX_IOPATTERN_OUT_OPAQUE_MEMORY; // Configure Media SDK to keep more operations in flight // - AsyncDepth represents the number of tasks that can be submitted, before synchronizing is required VPPParams.AsyncDepth = mfxDecParams.AsyncDepth; // Initialize encoder parameters mfxVideoParam mfxEncParams; memset(&mfxEncParams, 0, sizeof(mfxEncParams)); mfxEncParams.mfx.CodecId = MFX_CODEC_AVC; mfxEncParams.mfx.TargetUsage = MFX_TARGETUSAGE_BALANCED; mfxEncParams.mfx.TargetKbps = 500; mfxEncParams.mfx.RateControlMethod = MFX_RATECONTROL_VBR; mfxEncParams.mfx.FrameInfo.FrameRateExtN = 30; mfxEncParams.mfx.FrameInfo.FrameRateExtD = 1; mfxEncParams.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12; mfxEncParams.mfx.FrameInfo.ChromaFormat = MFX_CHROMAFORMAT_YUV420; mfxEncParams.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; mfxEncParams.mfx.FrameInfo.CropX = 0; mfxEncParams.mfx.FrameInfo.CropY = 0; mfxEncParams.mfx.FrameInfo.CropW = VPPParams.vpp.Out.CropW; // Half the resolution of decode stream mfxEncParams.mfx.FrameInfo.CropH = VPPParams.vpp.Out.CropH; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture mfxEncParams.mfx.FrameInfo.Width = MSDK_ALIGN16(mfxEncParams.mfx.FrameInfo.CropW); mfxEncParams.mfx.FrameInfo.Height = (MFX_PICSTRUCT_PROGRESSIVE == mfxEncParams.mfx.FrameInfo.PicStruct)? MSDK_ALIGN16(mfxEncParams.mfx.FrameInfo.CropH) : MSDK_ALIGN32(mfxEncParams.mfx.FrameInfo.CropH); mfxEncParams.IOPattern = MFX_IOPATTERN_IN_OPAQUE_MEMORY; // Configure Media SDK to keep more operations in flight // - AsyncDepth represents the number of tasks that can be submitted, before synchronizing is required mfxEncParams.AsyncDepth = mfxDecParams.AsyncDepth; // Query number required surfaces for decoder mfxFrameAllocRequest DecRequest; memset(&DecRequest, 0, sizeof(DecRequest)); sts = pmfxDEC->QueryIOSurf(&mfxDecParams, &DecRequest); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number required surfaces for encoder mfxFrameAllocRequest EncRequest; memset(&EncRequest, 0, sizeof(EncRequest)); sts = pmfxENC->QueryIOSurf(&mfxEncParams, &EncRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number of required surfaces for VPP mfxFrameAllocRequest VPPRequest[2];// [0] - in, [1] - out memset(&VPPRequest, 0, sizeof(mfxFrameAllocRequest)*2); sts = pmfxVPP->QueryIOSurf(&VPPParams, VPPRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Determine the required number of surfaces for decoder output (VPP input) and for VPP output (encoder input) mfxU16 nSurfNumDecVPP = DecRequest.NumFrameSuggested + VPPRequest[0].NumFrameSuggested + VPPParams.AsyncDepth; mfxU16 nSurfNumVPPEnc = EncRequest.NumFrameSuggested + VPPRequest[1].NumFrameSuggested + VPPParams.AsyncDepth; // Initialize shared surfaces for decoder, VPP and encode // - Note that no buffer memory is allocated, for opaque memory this is handled by Media SDK internally // - Frame surface array keeps reference to all surfaces // - Opaque memory is configured with the mfxExtOpaqueSurfaceAlloc extended buffers mfxFrameSurface1** pSurfaces = new mfxFrameSurface1*[nSurfNumDecVPP]; MSDK_CHECK_POINTER(pSurfaces, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nSurfNumDecVPP; i++) { pSurfaces[i] = new mfxFrameSurface1; MSDK_CHECK_POINTER(pSurfaces[i], MFX_ERR_MEMORY_ALLOC); memset(pSurfaces[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pSurfaces[i]->Info), &(DecRequest.Info), sizeof(mfxFrameInfo)); } mfxFrameSurface1** pSurfaces2 = new mfxFrameSurface1*[nSurfNumVPPEnc]; MSDK_CHECK_POINTER(pSurfaces2, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nSurfNumVPPEnc; i++) { pSurfaces2[i] = new mfxFrameSurface1; MSDK_CHECK_POINTER(pSurfaces2[i], MFX_ERR_MEMORY_ALLOC); memset(pSurfaces2[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pSurfaces2[i]->Info), &(EncRequest.Info), sizeof(mfxFrameInfo)); } mfxExtOpaqueSurfaceAlloc extOpaqueAllocDec; memset(&extOpaqueAllocDec, 0, sizeof(extOpaqueAllocDec)); extOpaqueAllocDec.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION; extOpaqueAllocDec.Header.BufferSz = sizeof(mfxExtOpaqueSurfaceAlloc); mfxExtBuffer* pExtParamsDec = (mfxExtBuffer*)&extOpaqueAllocDec; mfxExtOpaqueSurfaceAlloc extOpaqueAllocVPP; memset(&extOpaqueAllocVPP, 0, sizeof(extOpaqueAllocVPP)); extOpaqueAllocVPP.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION; extOpaqueAllocVPP.Header.BufferSz = sizeof(mfxExtOpaqueSurfaceAlloc); mfxExtBuffer* pExtParamsVPP = (mfxExtBuffer*)&extOpaqueAllocVPP; mfxExtOpaqueSurfaceAlloc extOpaqueAllocEnc; memset(&extOpaqueAllocEnc, 0, sizeof(extOpaqueAllocEnc)); extOpaqueAllocEnc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION; extOpaqueAllocEnc.Header.BufferSz = sizeof(mfxExtOpaqueSurfaceAlloc); mfxExtBuffer* pExtParamsENC = (mfxExtBuffer*)&extOpaqueAllocEnc; extOpaqueAllocDec.Out.Surfaces = pSurfaces; extOpaqueAllocDec.Out.NumSurface = nSurfNumDecVPP; extOpaqueAllocDec.Out.Type = DecRequest.Type; memcpy(&extOpaqueAllocVPP.In, &extOpaqueAllocDec.Out, sizeof(extOpaqueAllocDec.Out)); extOpaqueAllocVPP.Out.Surfaces = pSurfaces2; extOpaqueAllocVPP.Out.NumSurface = nSurfNumVPPEnc; extOpaqueAllocVPP.Out.Type = EncRequest.Type; memcpy(&extOpaqueAllocEnc.In, &extOpaqueAllocVPP.Out, sizeof(extOpaqueAllocVPP.Out)); mfxDecParams.ExtParam = &pExtParamsDec; mfxDecParams.NumExtParam = 1; VPPParams.ExtParam = &pExtParamsVPP; VPPParams.NumExtParam = 1; mfxEncParams.ExtParam = &pExtParamsENC; mfxEncParams.NumExtParam = 1; // Initialize the Media SDK decoder sts = pmfxDEC->Init(&mfxDecParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize the Media SDK encoder sts = pmfxENC->Init(&mfxEncParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize Media SDK VPP sts = pmfxVPP->Init(&VPPParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Retrieve video parameters selected by encoder. // - BufferSizeInKB parameter is required to set bit stream buffer size mfxVideoParam par; memset(&par, 0, sizeof(par)); sts = pmfxENC->GetVideoParam(&par); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create task pool to improve asynchronous performance (greater GPU utilization) mfxU16 taskPoolSize = mfxEncParams.AsyncDepth; // number of tasks that can be submitted, before synchronizing is required Task* pTasks = new Task[taskPoolSize]; memset(pTasks, 0, sizeof(Task) * taskPoolSize); for(int i=0;i<taskPoolSize;i++) { // Prepare Media SDK bit stream buffer pTasks[i].mfxBS.MaxLength = par.mfx.BufferSizeInKB * 1000; pTasks[i].mfxBS.Data = new mfxU8[pTasks[i].mfxBS.MaxLength]; MSDK_CHECK_POINTER(pTasks[i].mfxBS.Data, MFX_ERR_MEMORY_ALLOC); } // =================================== // Start transcoding the frames // #ifdef ENABLE_BENCHMARK LARGE_INTEGER tStart, tEnd; QueryPerformanceFrequency(&tStart); double freq = (double)tStart.QuadPart; QueryPerformanceCounter(&tStart); #endif mfxSyncPoint syncpD, syncpV; mfxFrameSurface1* pmfxOutSurface = NULL; mfxU32 nFrame = 0; int nIndex = 0; int nIndex2 = 0; int nFirstSyncTask = 0; int nTaskIdx = 0; // // Stage 1: Main transcoding loop // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } else { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // just wait and then repeat the same call to DecodeFrameAsync if (MFX_ERR_MORE_DATA == sts) { sts = ReadBitStreamData(&mfxBS, fSource); // Read more data to input bit stream MSDK_BREAK_ON_ERROR(sts); } if (MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE == sts) { nIndex = GetFreeSurfaceIndex(pSurfaces, nSurfNumDecVPP); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; } // Decode a frame asychronously (returns immediately) sts = pmfxDEC->DecodeFrameAsync(&mfxBS, pSurfaces[nIndex], &pmfxOutSurface, &syncpD); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncpD) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) { nIndex2 = GetFreeSurfaceIndex(pSurfaces2, nSurfNumVPPEnc); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = pmfxVPP->RunFrameVPPAsync(pmfxOutSurface, pSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } // VPP needs more data, let decoder decode another frame as input if (MFX_ERR_MORE_DATA == sts) { continue; } else if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); for (;;) { // Encode a frame asychronously (returns immediately) sts = pmfxENC->EncodeFrameAsync(NULL, pSurfaces2[nIndex2], &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) { // Allocate more bitstream buffer memory here if needed... break; } else break; } } } } // MFX_ERR_MORE_DATA means that file has ended, need to go to buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 2: Retrieve the buffered decoded frames // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_SURFACE == sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } else { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); nIndex = GetFreeSurfaceIndex(pSurfaces, nSurfNumDecVPP); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; // Decode a frame asychronously (returns immediately) sts = pmfxDEC->DecodeFrameAsync(NULL, pSurfaces[nIndex], &pmfxOutSurface, &syncpD); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncpD) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) { nIndex2 = GetFreeSurfaceIndex(pSurfaces2, nSurfNumVPPEnc); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = pmfxVPP->RunFrameVPPAsync(pmfxOutSurface, pSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } // VPP needs more data, let decoder decode another frame as input if (MFX_ERR_MORE_DATA == sts) { continue; } else if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); for (;;) { // Encode a frame asychronously (returns immediately) sts = pmfxENC->EncodeFrameAsync(NULL, pSurfaces2[nIndex2], &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) { // Allocate more bitstream buffer memory here if needed... break; } else break; } } } } // MFX_ERR_MORE_DATA indicates that all decode buffers has been fetched, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 3: Retrieve buffered frames from VPP // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } else { nIndex2 = GetFreeSurfaceIndex(pSurfaces2, nSurfNumVPPEnc); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = pmfxVPP->RunFrameVPPAsync(NULL, pSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); for (;;) { // Encode a frame asychronously (returns immediately) sts = pmfxENC->EncodeFrameAsync(NULL, pSurfaces2[nIndex2], &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) { // Allocate more bitstream buffer memory here if needed... break; } else break; } } } // MFX_ERR_MORE_DATA indicates that all VPP buffers has been fetched, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 4: Retrieve the buffered encoded frames // while (MFX_ERR_NONE <= sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } else { for (;;) { // Encode a frame asychronously (returns immediately) sts = pmfxENC->EncodeFrameAsync(NULL, NULL, &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; } } } // MFX_ERR_MORE_DATA indicates that there are no more buffered frames, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 5: Sync all remaining tasks in task pool // while(pTasks[nFirstSyncTask].syncp) { sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } #ifdef ENABLE_BENCHMARK QueryPerformanceCounter(&tEnd); double duration = ((double)tEnd.QuadPart - (double)tStart.QuadPart) / freq; printf("\n[%d] Execution time: %3.2fs (%3.2ffps)\n", pData->id, duration, nFrame/duration); #endif // =================================================================== // Clean up resources // - It is recommended to close Media SDK components first, before releasing allocated surfaces, since // some surfaces may still be locked by internal Media SDK resources. pmfxENC->Close(); pmfxDEC->Close(); pmfxVPP->Close(); delete pmfxENC; delete pmfxDEC; delete pmfxVPP; pmfxSession->Close(); delete pmfxSession; for (int i = 0; i < nSurfNumDecVPP; i++) delete pSurfaces[i]; for (int i = 0; i < nSurfNumVPPEnc; i++) delete pSurfaces2[i]; MSDK_SAFE_DELETE_ARRAY(pSurfaces); MSDK_SAFE_DELETE_ARRAY(pSurfaces2); MSDK_SAFE_DELETE_ARRAY(mfxBS.Data); for(int i=0;i<taskPoolSize;i++) MSDK_SAFE_DELETE_ARRAY(pTasks[i].mfxBS.Data); MSDK_SAFE_DELETE_ARRAY(pTasks); fclose(fSource); fclose(fSink); return 0; }
int main() { mfxStatus sts = MFX_ERR_NONE; mfxU16 inputWidth = 1920; mfxU16 inputHeight = 1080; // ===================================================================== // Intel Media SDK Video Pre/Post Processing (VPP) pipeline setup // - Showcasing two VPP features // - Resize (frame width and height is halved) // - ProcAmp: Increase brightness // - Video memory surfaces are used // // Open input YV12 YUV file FILE* fSource; fopen_s(&fSource, "bbb1920x1080.yuv", "rb"); MSDK_CHECK_POINTER(fSource, MFX_ERR_NULL_PTR); // Create output YUV file FILE* fSink; fopen_s(&fSink, "bbb960x540_vpp_bright_d3d.yuv", "wb"); MSDK_CHECK_POINTER(fSink, MFX_ERR_NULL_PTR); // Initialize Media SDK session // - MFX_IMPL_AUTO_ANY selects HW accelaration if available (on any adapter) // - Version 1.0 is selected for greatest backwards compatibility. // If more recent API features are needed, change the version accordingly mfxIMPL impl = MFX_IMPL_AUTO_ANY; #ifdef DX11_D3D impl |= MFX_IMPL_VIA_D3D11; #endif mfxVersion ver = {0, 1}; MFXVideoSession mfxSession; sts = mfxSession.Init(impl, &ver); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize VPP parameters mfxVideoParam VPPParams; memset(&VPPParams, 0, sizeof(VPPParams)); // Input data VPPParams.vpp.In.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.In.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.In.CropX = 0; VPPParams.vpp.In.CropY = 0; VPPParams.vpp.In.CropW = inputWidth; VPPParams.vpp.In.CropH = inputHeight; VPPParams.vpp.In.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.In.FrameRateExtN = 30; VPPParams.vpp.In.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.In.Width = MSDK_ALIGN16(inputWidth); VPPParams.vpp.In.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.In.PicStruct)? MSDK_ALIGN16(inputHeight) : MSDK_ALIGN32(inputHeight); // Output data VPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.Out.CropX = 0; VPPParams.vpp.Out.CropY = 0; VPPParams.vpp.Out.CropW = inputWidth/2; VPPParams.vpp.Out.CropH = inputHeight/2; VPPParams.vpp.Out.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.Out.FrameRateExtN = 30; VPPParams.vpp.Out.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.Out.Width = MSDK_ALIGN16(VPPParams.vpp.Out.CropW); VPPParams.vpp.Out.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.Out.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.Out.CropH) : MSDK_ALIGN32(VPPParams.vpp.Out.CropH); VPPParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY; // Create Media SDK VPP component MFXVideoVPP mfxVPP(mfxSession); // Create DirectX device context mfxHDL deviceHandle; sts = CreateHWDevice(mfxSession, &deviceHandle, NULL); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Provide device manager to Media SDK sts = mfxSession.SetHandle(DEVICE_MGR_TYPE, deviceHandle); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxFrameAllocator mfxAllocator; mfxAllocator.Alloc = simple_alloc; mfxAllocator.Free = simple_free; mfxAllocator.Lock = simple_lock; mfxAllocator.Unlock = simple_unlock; mfxAllocator.GetHDL = simple_gethdl; // When using video memory we must provide Media SDK with an external allocator sts = mfxSession.SetFrameAllocator(&mfxAllocator); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number of required surfaces for VPP mfxFrameAllocRequest VPPRequest[2];// [0] - in, [1] - out memset(&VPPRequest, 0, sizeof(mfxFrameAllocRequest)*2); sts = mfxVPP.QueryIOSurf(&VPPParams, VPPRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); #ifdef DX11_D3D VPPRequest[0].Type |= WILL_WRITE; // Hint to DX11 memory handler that application will write data to input surfaces VPPRequest[1].Type |= WILL_READ; // Hint to DX11 memory handler that application will read data from output surfaces #endif // Allocate required surfaces mfxFrameAllocResponse mfxResponseIn; mfxFrameAllocResponse mfxResponseOut; sts = mfxAllocator.Alloc(mfxAllocator.pthis, &VPPRequest[0], &mfxResponseIn); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = mfxAllocator.Alloc(mfxAllocator.pthis, &VPPRequest[1], &mfxResponseOut); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxU16 nVPPSurfNumIn = mfxResponseIn.NumFrameActual; mfxU16 nVPPSurfNumOut = mfxResponseOut.NumFrameActual; // Allocate surface headers (mfxFrameSurface1) for VPP mfxFrameSurface1** pVPPSurfacesIn = new mfxFrameSurface1*[nVPPSurfNumIn]; MSDK_CHECK_POINTER(pVPPSurfacesIn, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nVPPSurfNumIn; i++) { pVPPSurfacesIn[i] = new mfxFrameSurface1; memset(pVPPSurfacesIn[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pVPPSurfacesIn[i]->Info), &(VPPParams.vpp.In), sizeof(mfxFrameInfo)); pVPPSurfacesIn[i]->Data.MemId = mfxResponseIn.mids[i]; // MID (memory id) represent one D3D NV12 surface #ifndef ENABLE_INPUT // In case simulating direct access to frames we initialize the allocated surfaces with default pattern // - For true benchmark comparisons to async workloads all surfaces must have the same data #ifndef DX11_D3D IDirect3DSurface9 *pSurface; D3DSURFACE_DESC desc; D3DLOCKED_RECT locked; pSurface = (IDirect3DSurface9 *)mfxResponseIn.mids[i]; pSurface->GetDesc(&desc); pSurface->LockRect(&locked, 0, D3DLOCK_NOSYSLOCK); memset((mfxU8 *)locked.pBits, 100, desc.Height*locked.Pitch); // Y plane memset((mfxU8 *)locked.pBits + desc.Height * locked.Pitch, 50, (desc.Height*locked.Pitch)/2); // UV plane pSurface->UnlockRect(); #else // For now, just leave D3D11 surface data uninitialized #endif #endif } mfxFrameSurface1** pVPPSurfacesOut = new mfxFrameSurface1*[nVPPSurfNumOut]; MSDK_CHECK_POINTER(pVPPSurfacesOut, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nVPPSurfNumOut; i++) { pVPPSurfacesOut[i] = new mfxFrameSurface1; memset(pVPPSurfacesOut[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pVPPSurfacesOut[i]->Info), &(VPPParams.vpp.Out), sizeof(mfxFrameInfo)); pVPPSurfacesOut[i]->Data.MemId = mfxResponseOut.mids[i]; // MID (memory id) represent one D3D NV12 surface } // Initialize extended buffer for frame processing // - Process amplifier (ProcAmp) used to control brightness // - mfxExtVPPDoUse: Define the processing algorithm to be used // - mfxExtVPPProcAmp: ProcAmp configuration // - mfxExtBuffer: Add extended buffers to VPP parameter configuration mfxExtVPPDoUse extDoUse; mfxU32 tabDoUseAlg[1]; extDoUse.Header.BufferId = MFX_EXTBUFF_VPP_DOUSE; extDoUse.Header.BufferSz = sizeof(mfxExtVPPDoUse); extDoUse.NumAlg = 1; extDoUse.AlgList = tabDoUseAlg; tabDoUseAlg[0] = MFX_EXTBUFF_VPP_PROCAMP; mfxExtVPPProcAmp procampConfig; procampConfig.Header.BufferId = MFX_EXTBUFF_VPP_PROCAMP; procampConfig.Header.BufferSz = sizeof(mfxExtVPPProcAmp); procampConfig.Hue = 0.0f; // Default procampConfig.Saturation = 1.0f; // Default procampConfig.Contrast = 1.0; // Default procampConfig.Brightness = 40.0; // Adjust brightness mfxExtBuffer* ExtBuffer[2]; ExtBuffer[0] = (mfxExtBuffer*)&extDoUse; ExtBuffer[1] = (mfxExtBuffer*)&procampConfig; VPPParams.NumExtParam = 2; VPPParams.ExtParam = (mfxExtBuffer**)&ExtBuffer[0]; // Initialize Media SDK VPP sts = mfxVPP.Init(&VPPParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // =================================== // Start processing the frames // #ifdef ENABLE_BENCHMARK LARGE_INTEGER tStart, tEnd; QueryPerformanceFrequency(&tStart); double freq = (double)tStart.QuadPart; QueryPerformanceCounter(&tStart); #endif int nSurfIdxIn = 0, nSurfIdxOut = 0; mfxSyncPoint syncp; mfxU32 nFrame = 0; // // Stage 1: Main processing loop // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts) { nSurfIdxIn = GetFreeSurfaceIndex(pVPPSurfacesIn, nVPPSurfNumIn); // Find free input frame surface if (MFX_ERR_NOT_FOUND == nSurfIdxIn) return MFX_ERR_MEMORY_ALLOC; // Surface locking required when read/write D3D surfaces sts = mfxAllocator.Lock(mfxAllocator.pthis, pVPPSurfacesIn[nSurfIdxIn]->Data.MemId, &(pVPPSurfacesIn[nSurfIdxIn]->Data)); MSDK_BREAK_ON_ERROR(sts); sts = LoadRawFrame(pVPPSurfacesIn[nSurfIdxIn], fSource); // Load frame from file into surface MSDK_BREAK_ON_ERROR(sts); sts = mfxAllocator.Unlock(mfxAllocator.pthis, pVPPSurfacesIn[nSurfIdxIn]->Data.MemId, &(pVPPSurfacesIn[nSurfIdxIn]->Data)); MSDK_BREAK_ON_ERROR(sts); nSurfIdxOut = GetFreeSurfaceIndex(pVPPSurfacesOut, nVPPSurfNumOut); // Find free output frame surface if (MFX_ERR_NOT_FOUND == nSurfIdxOut) return MFX_ERR_MEMORY_ALLOC; // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(pVPPSurfacesIn[nSurfIdxIn], pVPPSurfacesOut[nSurfIdxOut], NULL, &syncp); if (MFX_ERR_MORE_DATA == sts) continue; // MFX_ERR_MORE_SURFACE means output is ready but need more surface (example: Frame Rate Conversion 30->60) // * Not handled in this example! MSDK_BREAK_ON_ERROR(sts); sts = mfxSession.SyncOperation(syncp, 60000); // Synchronize. Wait until frame processing is ready MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); ++nFrame; #ifdef ENABLE_OUTPUT // Surface locking required when read/write D3D surfaces sts = mfxAllocator.Lock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data)); MSDK_BREAK_ON_ERROR(sts); sts = WriteRawFrame(pVPPSurfacesOut[nSurfIdxOut], fSink); MSDK_BREAK_ON_ERROR(sts); sts = mfxAllocator.Unlock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data)); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } // MFX_ERR_MORE_DATA means that the input file has ended, need to go to buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 2: Retrieve the buffered VPP frames // while (MFX_ERR_NONE <= sts) { nSurfIdxOut = GetFreeSurfaceIndex(pVPPSurfacesOut, nVPPSurfNumOut); // Find free frame surface if (MFX_ERR_NOT_FOUND == nSurfIdxOut) return MFX_ERR_MEMORY_ALLOC; // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(NULL, pVPPSurfacesOut[nSurfIdxOut], NULL, &syncp); MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_SURFACE); MSDK_BREAK_ON_ERROR(sts); sts = mfxSession.SyncOperation(syncp, 60000); // Synchronize. Wait until frame processing is ready MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); ++nFrame; #ifdef ENABLE_OUTPUT // Surface locking required when read/write D3D surfaces sts = mfxAllocator.Lock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data)); MSDK_BREAK_ON_ERROR(sts); sts = WriteRawFrame(pVPPSurfacesOut[nSurfIdxOut], fSink); MSDK_BREAK_ON_ERROR(sts); sts = mfxAllocator.Unlock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data)); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } // MFX_ERR_MORE_DATA indicates that there are no more buffered frames, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); #ifdef ENABLE_BENCHMARK QueryPerformanceCounter(&tEnd); double duration = ((double)tEnd.QuadPart - (double)tStart.QuadPart) / freq; printf("\nExecution time: %3.2fs (%3.2ffps)\n", duration, nFrame/duration); #endif // =================================================================== // Clean up resources // - It is recommended to close Media SDK components first, before releasing allocated surfaces, since // some surfaces may still be locked by internal Media SDK resources. mfxVPP.Close(); //mfxSession closed automatically on destruction for (int i = 0; i < nVPPSurfNumIn; i++) delete pVPPSurfacesIn[i]; MSDK_SAFE_DELETE_ARRAY(pVPPSurfacesIn); for (int i = 0; i < nVPPSurfNumOut; i++) delete pVPPSurfacesOut[i]; MSDK_SAFE_DELETE_ARRAY(pVPPSurfacesOut); fclose(fSource); fclose(fSink); CleanupHWDevice(); return 0; }
HRESULT CDXVADecoderH264_DXVA1::DecodeFrame(BYTE* pDataIn, UINT nSize, REFERENCE_TIME rtStart, REFERENCE_TIME rtStop) { HRESULT hr = S_FALSE; int nSurfaceIndex = -1; int nFramePOC = INT_MIN; int nOutPOC = INT_MIN; REFERENCE_TIME rtOutStart = INVALID_TIME; CH264Nalu Nalu; CComPtr<IMediaSample> pSampleToDeliver; CHECK_HR_FALSE (FFH264DecodeFrame(m_pFilter->GetAVCtx(), m_pFilter->GetFrame(), pDataIn, nSize, rtStart, &nFramePOC, &nOutPOC, &rtOutStart, &m_nNALLength)); // If parsing fail (probably no PPS/SPS), continue anyway it may arrived later (happen on truncated streams) CHECK_HR_FALSE (FFH264BuildPicParams(m_pFilter->GetAVCtx(), &m_DXVAPicParams, &m_DXVAScalingMatrix, m_IsATIUVD)); TRACE_H264 ("CDXVADecoderH264_DXVA1::DecodeFrame() : nFramePOC = %11d, nOutPOC = %11d[%11d], [%d - %d], rtOutStart = [%20I64d]\n", nFramePOC, nOutPOC, m_nOutPOC, m_DXVAPicParams.field_pic_flag, m_DXVAPicParams.RefPicFlag, rtOutStart); // Wait I frame after a flush if (m_bFlushed && !m_DXVAPicParams.IntraPicFlag) { TRACE_H264 ("CDXVADecoderH264_DXVA1::DecodeFrame() : Flush - wait I frame\n"); return S_FALSE; } CHECK_HR_FALSE (GetFreeSurfaceIndex(nSurfaceIndex, &pSampleToDeliver, rtStart, rtStop)); FFH264SetCurrentPicture(nSurfaceIndex, &m_DXVAPicParams, m_pFilter->GetAVCtx()); { m_DXVAPicParams.StatusReportFeedbackNumber++; CHECK_HR_FALSE (BeginFrame(nSurfaceIndex, pSampleToDeliver)); // Send picture parameters CHECK_HR_FALSE (AddExecuteBuffer(DXVA2_PictureParametersBufferType, sizeof(m_DXVAPicParams), &m_DXVAPicParams)); // Add bitstream CHECK_HR_FALSE (AddExecuteBuffer(DXVA2_BitStreamDateBufferType, nSize, pDataIn)); // Add quantization matrix CHECK_HR_FALSE (AddExecuteBuffer(DXVA2_InverseQuantizationMatrixBufferType, sizeof(DXVA_Qmatrix_H264), &m_DXVAScalingMatrix)); // Add slice control CHECK_HR_FALSE (AddExecuteBuffer(DXVA2_SliceControlBufferType, sizeof(DXVA_Slice_H264_Short) * m_nSlices, m_pSliceShort)); // Decode frame CHECK_HR_FALSE (Execute()); CHECK_HR_FALSE (EndFrame(nSurfaceIndex)); } bool bAdded = AddToStore(nSurfaceIndex, pSampleToDeliver, m_DXVAPicParams.RefPicFlag, rtStart, rtStop, m_DXVAPicParams.field_pic_flag, nFramePOC); FFH264UpdateRefFramesList(&m_DXVAPicParams, m_pFilter->GetAVCtx()); ClearUnusedRefFrames(); if (bAdded) { hr = DisplayNextFrame(); } if (nOutPOC != INT_MIN) { m_nOutPOC = nOutPOC; m_rtOutStart = rtOutStart; } m_bFlushed = false; return hr; }
INT CmSurfaceManager::CreateBuffer(UINT size, CM_BUFFER_TYPE type, CmBuffer_RT * &pSurface1D, CmOsResource * pCmOsResource, void *&pSysMem) { UINT index = m_pCmDevice->ValidSurfaceIndexStart(); pSurface1D = NULL; if (pCmOsResource) { if (GetFreeSurfaceIndex(index) != CM_SUCCESS) { return CM_EXCEED_SURFACE_AMOUNT; } } else { BOOL useNewSurface = TRUE; if (AllocateSurfaceIndex (size, 0, 0, CM_SURFACE_FORMAT_UNKNOWN, index, useNewSurface, pSysMem) != CM_SUCCESS) { return CM_EXCEED_SURFACE_AMOUNT; } if (!useNewSurface) { CmSurface *pSurface = m_SurfaceArray[index]; if (pSurface && (pSurface->Type() == CM_ENUM_CLASS_TYPE_CMBUFFER_RT)) { pSurface1D = static_cast < CmBuffer_RT * >(pSurface); } else { return CM_FAILURE; } UpdateBuffer(pSurface1D, size); UPDATE_PROFILE_FOR_1D_SURFACE(index, size, TRUE); return CM_SUCCESS; } } if (m_bufferCount >= m_maxBufferCount) { CM_ASSERT(0); return CM_EXCEED_SURFACE_AMOUNT; } UINT handle = 0; INT result = AllocateBuffer(size, type, handle, pCmOsResource, pSysMem); if (result != CM_SUCCESS) { CM_ASSERT(0); return result; } result = CmBuffer_RT::Create(index, handle, size, pCmOsResource == NULL, this, type, pSysMem, pSurface1D); if (result != CM_SUCCESS) { FreeBuffer(handle); CM_ASSERT(0); return result; } m_SurfaceArray[index] = pSurface1D; UPDATE_PROFILE_FOR_1D_SURFACE(index, size, FALSE); return CM_SUCCESS; }
HRESULT CDXVADecoderH264::DecodeFrame(BYTE* pDataIn, UINT nSize, REFERENCE_TIME rtStart, REFERENCE_TIME rtStop) { HRESULT hr = S_FALSE; UINT nSlices = 0; int nSurfaceIndex = -1; int nFieldType = -1; int nSliceType = -1; int nFramePOC = INT_MIN; int nOutPOC = INT_MIN; REFERENCE_TIME rtOutStart = _I64_MIN; CH264Nalu Nalu; UINT nNalOffset = 0; CComPtr<IMediaSample> pSampleToDeliver; CComQIPtr<IMPCDXVA2Sample> pDXVA2Sample; int slice_step = 1; if (FFH264DecodeBuffer(m_pFilter->GetAVCtx(), pDataIn, nSize, &nFramePOC, &nOutPOC, &rtOutStart) == -1) { return S_FALSE; } while (!nSlices && slice_step <= 2) { Nalu.SetBuffer(pDataIn, nSize, slice_step == 1 ? m_nNALLength : 0); while (Nalu.ReadNext()) { switch (Nalu.GetType()) { case NALU_TYPE_SLICE: case NALU_TYPE_IDR: if (m_bUseLongSlice) { m_pSliceLong[nSlices].BSNALunitDataLocation = nNalOffset; m_pSliceLong[nSlices].SliceBytesInBuffer = (UINT)Nalu.GetDataLength() + 3; //.GetRoundedDataLength(); m_pSliceLong[nSlices].slice_id = nSlices; FF264UpdateRefFrameSliceLong(&m_DXVAPicParams, &m_pSliceLong[nSlices], m_pFilter->GetAVCtx()); if (nSlices > 0) { m_pSliceLong[nSlices - 1].NumMbsForSlice = m_pSliceLong[nSlices].NumMbsForSlice = m_pSliceLong[nSlices].first_mb_in_slice - m_pSliceLong[nSlices - 1].first_mb_in_slice; } } nSlices++; nNalOffset += (UINT)(Nalu.GetDataLength() + 3); if (nSlices > MAX_SLICES) { break; } break; } } slice_step++; } if (!nSlices) { return S_FALSE; } m_nMaxWaiting = min(max(m_DXVAPicParams.num_ref_frames, 3), 8); // If parsing fail (probably no PPS/SPS), continue anyway it may arrived later (happen on truncated streams) if (FAILED(FFH264BuildPicParams(&m_DXVAPicParams, &m_DXVAScalingMatrix, &nFieldType, &nSliceType, m_pFilter->GetAVCtx(), m_pFilter->GetPCIVendor()))) { return S_FALSE; } TRACE_H264("CDXVADecoderH264::DecodeFrame() : nFramePOC = %11d, nOutPOC = %11d[%11d], [%d - %d], rtOutStart = [%20I64d]\n", nFramePOC, nOutPOC, m_nOutPOC, m_DXVAPicParams.field_pic_flag, m_DXVAPicParams.RefPicFlag, rtOutStart); // Wait I frame after a flush if (m_bFlushed && !m_DXVAPicParams.IntraPicFlag) { TRACE_H264("CDXVADecoderH264::DecodeFrame() : Flush - wait I frame\n"); m_nBrokenFramesFlag = 0; m_nBrokenFramesFlag_POC = 0; m_nfield_pic_flag = m_DXVAPicParams.field_pic_flag; m_nRefPicFlag = m_DXVAPicParams.RefPicFlag; m_nPrevOutPOC = INT_MIN; return S_FALSE; } /* Disabled, because that causes serious problems. // Some magic code for detecting the incorrect decoding of interlaced frames ... // TODO : necessary to make it better, and preferably on the side of ffmpeg ... if (m_nfield_pic_flag && m_nfield_pic_flag == m_DXVAPicParams.field_pic_flag && m_nRefPicFlag == m_DXVAPicParams.RefPicFlag) { if (m_nPrevOutPOC == m_nOutPOC && m_nOutPOC == INT_MIN) { m_nBrokenFramesFlag_POC++; } m_nBrokenFramesFlag++; } else { m_nBrokenFramesFlag = 0; m_nBrokenFramesFlag_POC = 0; } m_nfield_pic_flag = m_DXVAPicParams.field_pic_flag; m_nRefPicFlag = m_DXVAPicParams.RefPicFlag; m_nPrevOutPOC = m_nOutPOC; if (m_nBrokenFramesFlag > 4) { m_nBrokenFramesFlag = 0; if (m_nBrokenFramesFlag_POC > 1) { TRACE_H264("CDXVADecoderH264::DecodeFrame() : Detected broken frames ... flush data\n"); m_nBrokenFramesFlag_POC = 0; Flush(); return S_FALSE; } } // */ CHECK_HR_TRACE(GetFreeSurfaceIndex(nSurfaceIndex, &pSampleToDeliver, rtStart, rtStop)); FFH264SetCurrentPicture(nSurfaceIndex, &m_DXVAPicParams, m_pFilter->GetAVCtx()); CHECK_HR_TRACE(BeginFrame(nSurfaceIndex, pSampleToDeliver)); m_DXVAPicParams.StatusReportFeedbackNumber++; // Send picture parameters CHECK_HR_TRACE(AddExecuteBuffer(DXVA2_PictureParametersBufferType, sizeof(m_DXVAPicParams), &m_DXVAPicParams)); CHECK_HR_TRACE(Execute()); // Add bitstream, slice control and quantization matrix CHECK_HR_TRACE(AddExecuteBuffer(DXVA2_BitStreamDateBufferType, nSize, pDataIn, &nSize)); if (m_bUseLongSlice) { CHECK_HR_TRACE(AddExecuteBuffer(DXVA2_SliceControlBufferType, sizeof(DXVA_Slice_H264_Long)*nSlices, m_pSliceLong)); } else { CHECK_HR_TRACE(AddExecuteBuffer(DXVA2_SliceControlBufferType, sizeof(DXVA_Slice_H264_Short)*nSlices, m_pSliceShort)); } CHECK_HR_TRACE(AddExecuteBuffer(DXVA2_InverseQuantizationMatrixBufferType, sizeof(DXVA_Qmatrix_H264), (void*)&m_DXVAScalingMatrix)); // Decode bitstream CHECK_HR_TRACE(Execute()); CHECK_HR_TRACE(EndFrame(nSurfaceIndex)); #if defined(_DEBUG) && 0 DisplayStatus(); #endif bool bAdded = AddToStore(nSurfaceIndex, pSampleToDeliver, m_DXVAPicParams.RefPicFlag, rtStart, rtStop, m_DXVAPicParams.field_pic_flag, (FF_FIELD_TYPE)nFieldType, (FF_SLICE_TYPE)nSliceType, nFramePOC); FFH264UpdateRefFramesList(&m_DXVAPicParams, m_pFilter->GetAVCtx()); ClearUnusedRefFrames(); if (bAdded) { hr = DisplayNextFrame(); } if (nOutPOC != INT_MIN) { m_nOutPOC = nOutPOC; m_rtOutStart = rtOutStart; } m_bFlushed = false; return hr; }
int main() { mfxStatus sts = MFX_ERR_NONE; // ===================================================================== // Intel Media SDK decode pipeline setup // - In this example we are decoding an AVC (H.264) stream // - For simplistic memory management, system memory surfaces are used to store the decoded frames // (Note that when using HW acceleration D3D surfaces are prefered, for better performance) // // - VPP used to post process (resize) the frame // // Open input H.264 elementary stream (ES) file FILE* fSource; fopen_s(&fSource, "bbb1920x1080.264", "rb"); MSDK_CHECK_POINTER(fSource, MFX_ERR_NULL_PTR); // Create output YUV file FILE* fSink; fopen_s(&fSink, "dectest_960x540.yuv", "wb"); MSDK_CHECK_POINTER(fSink, MFX_ERR_NULL_PTR); // Initialize Media SDK session // - MFX_IMPL_AUTO_ANY selects HW accelaration if available (on any adapter) // - Version 1.0 is selected for greatest backwards compatibility. // If more recent API features are needed, change the version accordingly mfxIMPL impl = MFX_IMPL_AUTO_ANY; mfxVersion ver = {0, 1}; MFXVideoSession mfxSession; sts = mfxSession.Init(impl, &ver); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create Media SDK decoder MFXVideoDECODE mfxDEC(mfxSession); // Create Media SDK VPP component MFXVideoVPP mfxVPP(mfxSession); // Set required video parameters for decode // - In this example we are decoding an AVC (H.264) stream // - For simplistic memory management, system memory surfaces are used to store the decoded frames // (Note that when using HW acceleration D3D surfaces are prefered, for better performance) mfxVideoParam mfxVideoParams; memset(&mfxVideoParams, 0, sizeof(mfxVideoParams)); mfxVideoParams.mfx.CodecId = MFX_CODEC_AVC; mfxVideoParams.IOPattern = MFX_IOPATTERN_OUT_SYSTEM_MEMORY; // Prepare Media SDK bit stream buffer // - Arbitrary buffer size for this example mfxBitstream mfxBS; memset(&mfxBS, 0, sizeof(mfxBS)); mfxBS.MaxLength = 1024 * 1024; mfxBS.Data = new mfxU8[mfxBS.MaxLength]; MSDK_CHECK_POINTER(mfxBS.Data, MFX_ERR_MEMORY_ALLOC); // Read a chunk of data from stream file into bit stream buffer // - Parse bit stream, searching for header and fill video parameters structure // - Abort if bit stream header is not found in the first bit stream buffer chunk sts = ReadBitStreamData(&mfxBS, fSource); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = mfxDEC.DecodeHeader(&mfxBS, &mfxVideoParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize VPP parameters // - For simplistic memory management, system memory surfaces are used to store the raw frames // (Note that when using HW acceleration D3D surfaces are prefered, for better performance) mfxVideoParam VPPParams; memset(&VPPParams, 0, sizeof(VPPParams)); // Input data VPPParams.vpp.In.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.In.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.In.CropX = 0; VPPParams.vpp.In.CropY = 0; VPPParams.vpp.In.CropW = mfxVideoParams.mfx.FrameInfo.CropW; VPPParams.vpp.In.CropH = mfxVideoParams.mfx.FrameInfo.CropH; VPPParams.vpp.In.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.In.FrameRateExtN = 30; VPPParams.vpp.In.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.In.Width = MSDK_ALIGN16(VPPParams.vpp.In.CropW); VPPParams.vpp.In.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.In.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.In.CropH) : MSDK_ALIGN32(VPPParams.vpp.In.CropH); // Output data VPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.Out.CropX = 0; VPPParams.vpp.Out.CropY = 0; VPPParams.vpp.Out.CropW = VPPParams.vpp.In.CropW/2; // Resize to half size resolution VPPParams.vpp.Out.CropH = VPPParams.vpp.In.CropH/2; VPPParams.vpp.Out.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.Out.FrameRateExtN = 30; VPPParams.vpp.Out.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.Out.Width = MSDK_ALIGN16(VPPParams.vpp.Out.CropW); VPPParams.vpp.Out.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.Out.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.Out.CropH) : MSDK_ALIGN32(VPPParams.vpp.Out.CropH); VPPParams.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY | MFX_IOPATTERN_OUT_SYSTEM_MEMORY; // Query number of required surfaces for decoder mfxFrameAllocRequest DecRequest; memset(&DecRequest, 0, sizeof(DecRequest)); sts = mfxDEC.QueryIOSurf(&mfxVideoParams, &DecRequest); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number of required surfaces for VPP mfxFrameAllocRequest VPPRequest[2];// [0] - in, [1] - out memset(&VPPRequest, 0, sizeof(mfxFrameAllocRequest)*2); sts = mfxVPP.QueryIOSurf(&VPPParams, VPPRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Determine the required number of surfaces for decoder output (VPP input) and for VPP output mfxU16 nSurfNumDecVPP = DecRequest.NumFrameSuggested + VPPRequest[0].NumFrameSuggested; mfxU16 nSurfNumVPPOut = VPPRequest[1].NumFrameSuggested; // Allocate surfaces for decoder and VPP In // - Width and height of buffer must be aligned, a multiple of 32 // - Frame surface array keeps pointers all surface planes and general frame info mfxU16 width = (mfxU16)MSDK_ALIGN32(DecRequest.Info.Width); mfxU16 height = (mfxU16)MSDK_ALIGN32(DecRequest.Info.Height); mfxU8 bitsPerPixel = 12; // NV12 format is a 12 bits per pixel format mfxU32 surfaceSize = width * height * bitsPerPixel / 8; mfxU8* surfaceBuffers = (mfxU8 *)new mfxU8[surfaceSize * nSurfNumDecVPP]; mfxFrameSurface1** pmfxSurfaces = new mfxFrameSurface1*[nSurfNumDecVPP]; MSDK_CHECK_POINTER(pmfxSurfaces, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nSurfNumDecVPP; i++) { pmfxSurfaces[i] = new mfxFrameSurface1; memset(pmfxSurfaces[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pmfxSurfaces[i]->Info), &(mfxVideoParams.mfx.FrameInfo), sizeof(mfxFrameInfo)); pmfxSurfaces[i]->Data.Y = &surfaceBuffers[surfaceSize * i]; pmfxSurfaces[i]->Data.U = pmfxSurfaces[i]->Data.Y + width * height; pmfxSurfaces[i]->Data.V = pmfxSurfaces[i]->Data.U + 1; pmfxSurfaces[i]->Data.Pitch = width; } // Allocate surfaces for VPP Out // - Width and height of buffer must be aligned, a multiple of 32 // - Frame surface array keeps pointers all surface planes and general frame info width = (mfxU16)MSDK_ALIGN32(VPPRequest[1].Info.Width); height = (mfxU16)MSDK_ALIGN32(VPPRequest[1].Info.Height); bitsPerPixel = 12; // NV12 format is a 12 bits per pixel format surfaceSize = width * height * bitsPerPixel / 8; mfxU8* surfaceBuffers2 = (mfxU8 *)new mfxU8[surfaceSize * nSurfNumVPPOut]; mfxFrameSurface1** pmfxSurfaces2 = new mfxFrameSurface1*[nSurfNumVPPOut]; MSDK_CHECK_POINTER(pmfxSurfaces2, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nSurfNumVPPOut; i++) { pmfxSurfaces2[i] = new mfxFrameSurface1; memset(pmfxSurfaces2[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pmfxSurfaces2[i]->Info), &(VPPParams.vpp.Out), sizeof(mfxFrameInfo)); pmfxSurfaces2[i]->Data.Y = &surfaceBuffers[surfaceSize * i]; pmfxSurfaces2[i]->Data.U = pmfxSurfaces2[i]->Data.Y + width * height; pmfxSurfaces2[i]->Data.V = pmfxSurfaces2[i]->Data.U + 1; pmfxSurfaces2[i]->Data.Pitch = width; } // Initialize the Media SDK decoder sts = mfxDEC.Init(&mfxVideoParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize Media SDK VPP sts = mfxVPP.Init(&VPPParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // =============================================================== // Start decoding the frames from the stream // #ifdef ENABLE_BENCHMARK LARGE_INTEGER tStart, tEnd; QueryPerformanceFrequency(&tStart); double freq = (double)tStart.QuadPart; QueryPerformanceCounter(&tStart); #endif mfxSyncPoint syncpD; mfxSyncPoint syncpV; mfxFrameSurface1* pmfxOutSurface = NULL; int nIndex = 0; int nIndex2 = 0; mfxU32 nFrame = 0; // // Stage 1: Main decoding loop // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts) { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // Wait if device is busy, then repeat the same call to DecodeFrameAsync if (MFX_ERR_MORE_DATA == sts) { sts = ReadBitStreamData(&mfxBS, fSource); // Read more data into input bit stream MSDK_BREAK_ON_ERROR(sts); } if (MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE == sts) { nIndex = GetFreeSurfaceIndex(pmfxSurfaces, nSurfNumDecVPP); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; } // Decode a frame asychronously (returns immediately) sts = mfxDEC.DecodeFrameAsync(&mfxBS, pmfxSurfaces[nIndex], &pmfxOutSurface, &syncpD); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncpD) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) { nIndex2 = GetFreeSurfaceIndex(pmfxSurfaces2, nSurfNumVPPOut); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(pmfxOutSurface, pmfxSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } // VPP needs more data, let decoder decode another frame as input if (MFX_ERR_MORE_DATA == sts) { continue; } else if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); } if (MFX_ERR_NONE == sts) sts = mfxSession.SyncOperation(syncpV, 60000); // Synchronize. Wait until decoded frame is ready if (MFX_ERR_NONE == sts) { ++nFrame; #ifdef ENABLE_OUTPUT sts = WriteRawFrame(pmfxSurfaces2[nIndex2], fSink); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } } // MFX_ERR_MORE_DATA means that file has ended, need to go to buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 2: Retrieve the buffered decoded frames // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_SURFACE == sts) { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // Wait if device is busy, then repeat the same call to DecodeFrameAsync nIndex = GetFreeSurfaceIndex(pmfxSurfaces, nSurfNumDecVPP); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; // Decode a frame asychronously (returns immediately) sts = mfxDEC.DecodeFrameAsync(NULL, pmfxSurfaces[nIndex], &pmfxOutSurface, &syncpD); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncpD) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) { nIndex2 = GetFreeSurfaceIndex(pmfxSurfaces2, nSurfNumVPPOut); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(pmfxOutSurface, pmfxSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } // VPP needs more data, let decoder decode another frame as input if (MFX_ERR_MORE_DATA == sts) { continue; } else if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); } if (MFX_ERR_NONE == sts) sts = mfxSession.SyncOperation(syncpV, 60000); // Synchronize. Waits until decoded frame is ready if (MFX_ERR_NONE == sts) { ++nFrame; #ifdef ENABLE_OUTPUT sts = WriteRawFrame(pmfxSurfaces2[nIndex2], fSink); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } } // MFX_ERR_MORE_DATA means that decoder is done with buffered frames, need to go to VPP buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 3: Retrieve the buffered VPP frames // while (MFX_ERR_NONE <= sts) { nIndex2 = GetFreeSurfaceIndex(pmfxSurfaces2, nSurfNumVPPOut); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex2) return MFX_ERR_MEMORY_ALLOC; // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(NULL, pmfxSurfaces2[nIndex2], NULL, &syncpV); MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_SURFACE); MSDK_BREAK_ON_ERROR(sts); sts = mfxSession.SyncOperation(syncpV, 60000); // Synchronize. Wait until frame processing is ready MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); ++nFrame; #ifdef ENABLE_OUTPUT sts = WriteRawFrame(pmfxSurfaces2[nIndex2], fSink); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } // MFX_ERR_MORE_DATA indicates that all buffers has been fetched, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); #ifdef ENABLE_BENCHMARK QueryPerformanceCounter(&tEnd); double duration = ((double)tEnd.QuadPart - (double)tStart.QuadPart) / freq; printf("\nExecution time: %3.2fs (%3.2ffps)\n", duration, nFrame/duration); #endif // =================================================================== // Clean up resources // - It is recommended to close Media SDK components first, before releasing allocated surfaces, since // some surfaces may still be locked by internal Media SDK resources. mfxDEC.Close(); mfxVPP.Close(); // mfxSession closed automatically on destruction for (int i = 0; i < nSurfNumDecVPP; i++) delete pmfxSurfaces[i]; for (int i = 0; i < nSurfNumVPPOut; i++) delete pmfxSurfaces2[i]; MSDK_SAFE_DELETE_ARRAY(pmfxSurfaces); MSDK_SAFE_DELETE_ARRAY(pmfxSurfaces2); MSDK_SAFE_DELETE_ARRAY(surfaceBuffers); MSDK_SAFE_DELETE_ARRAY(surfaceBuffers2); MSDK_SAFE_DELETE_ARRAY(mfxBS.Data); fclose(fSource); fclose(fSink); return 0; }