mfxStatus CQuickSyncDecoder::InitFrameAllocator(mfxVideoParam* pVideoParams, mfxU32 nPitch) { MSDK_TRACE("QsDecoder: InitFrameAllocator\n"); // Already initialized if (m_pFrameSurfaces) { return MFX_ERR_NONE; } MSDK_CHECK_POINTER(m_pmfxDEC, MFX_ERR_NOT_INITIALIZED); mfxStatus sts = MFX_ERR_NONE; // Initialize frame allocator (if needed) sts = CreateAllocator(); MSDK_CHECK_NOT_EQUAL(sts, MFX_ERR_NONE, sts); // Find how many surfaces are needed mfxFrameAllocRequest allocRequest; MSDK_ZERO_VAR(allocRequest); sts = m_pmfxDEC->QueryIOSurf(pVideoParams, &allocRequest); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_INCOMPATIBLE_VIDEO_PARAM); MSDK_CHECK_RESULT_P_RET(sts, MFX_ERR_NONE); allocRequest.NumFrameSuggested = (mfxU16)m_nAuxFrameCount + allocRequest.NumFrameSuggested; allocRequest.NumFrameMin = allocRequest.NumFrameSuggested; // Decide memory type allocRequest.Type = MFX_MEMTYPE_EXTERNAL_FRAME | MFX_MEMTYPE_FROM_DECODE; allocRequest.Type |= (m_bUseD3DAlloc) ? MFX_MEMTYPE_VIDEO_MEMORY_DECODER_TARGET : MFX_MEMTYPE_SYSTEM_MEMORY; memcpy(&allocRequest.Info, &pVideoParams->mfx.FrameInfo, sizeof(mfxFrameInfo)); // Allocate frames with H aligned at 32 for both progressive and interlaced content allocRequest.Info.Height = MSDK_ALIGN32(allocRequest.Info.Height); allocRequest.Info.Width = (mfxU16)nPitch; // Perform allocation call. result is saved in m_AllocResponse sts = m_pFrameAllocator->Alloc(m_pFrameAllocator->pthis, &allocRequest, &m_AllocResponse); MSDK_CHECK_RESULT_P_RET(sts, MFX_ERR_NONE); m_nRequiredFramesNum = m_AllocResponse.NumFrameActual; ASSERT(m_nRequiredFramesNum == allocRequest.NumFrameSuggested); m_pFrameSurfaces = new mfxFrameSurface1[m_nRequiredFramesNum]; MSDK_CHECK_POINTER(m_pFrameSurfaces, MFX_ERR_MEMORY_ALLOC); MSDK_ZERO_MEMORY(m_pFrameSurfaces, sizeof(mfxFrameSurface1) * m_nRequiredFramesNum); // Allocate decoder work & output surfaces for (mfxU32 i = 0; i < m_nRequiredFramesNum; ++i) { // Copy frame info memcpy(&(m_pFrameSurfaces[i].Info), &pVideoParams->mfx.FrameInfo, sizeof(mfxFrameInfo)); // Save pointer to allocator specific surface object (mid) m_pFrameSurfaces[i].Data.MemId = m_AllocResponse.mids[i]; m_pFrameSurfaces[i].Data.Pitch = (mfxU16)nPitch; } return sts; }
mfxStatus IntelDecoder::QueryAndAllocRequiredSurfacesForHW() { mfxStatus sts = MFX_ERR_NONE; // Query number of required surfaces for decoder mfxFrameAllocRequest Request; memset(&Request, 0, sizeof(Request)); sts = mfxDEC->QueryIOSurf(&mfxVideoParams, &Request); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); numSurfaces = Request.NumFrameSuggested; Request.Type |= WILL_READ; // This line is only required for Windows DirectX11 to ensure that surfaces can be retrieved by the application // Allocate surfaces for decoder //mfxFrameAllocResponse mfxResponse; sts = pMfxAllocator->Alloc(pMfxAllocator->pthis, &Request, &mfxResponse); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Allocate surface headers (mfxFrameSurface1) for decoder pmfxSurfaces = new mfxFrameSurface1 *[numSurfaces]; MSDK_CHECK_POINTER(pmfxSurfaces, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < numSurfaces; i++) { pmfxSurfaces[i] = new mfxFrameSurface1; memset(pmfxSurfaces[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pmfxSurfaces[i]->Info), &(mfxVideoParams.mfx.FrameInfo), sizeof(mfxFrameInfo)); pmfxSurfaces[i]->Data.MemId = mfxResponse.mids[i]; // MID (memory id) represents one video NV12 surface } return sts; }
mfxStatus IntelDecoder::FlushDecoderAndRender() { mfxStatus sts = MFX_ERR_NONE; mfxGetTime(&tStart); // // Stage 2: Retrieve the buffered decoded frames // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_SURFACE == sts) { if (MFX_WRN_DEVICE_BUSY == sts) MSDK_SLEEP(1); // Wait if device is busy, then repeat the same call to DecodeFrameAsync nIndex = GetFreeSurfaceIndex(pmfxSurfaces, numSurfaces); // Find free frame surface MSDK_CHECK_ERROR(MFX_ERR_NOT_FOUND, nIndex, MFX_ERR_MEMORY_ALLOC); // Decode a frame asychronously (returns immediately) sts = mfxDEC->DecodeFrameAsync(NULL, pmfxSurfaces[nIndex], &pmfxOutSurface, &syncp); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncp) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) sts = pSession->SyncOperation(syncp, 60000); // Synchronize. Waits until decoded frame is ready if (MFX_ERR_NONE == sts) { ++nFrame; if (impl_type == MFX_IMPL_SOFTWARE) { outMan.Render(pmfxOutSurface); } else { // Surface locking required when read/write D3D surfaces sts = pMfxAllocator->Lock(pMfxAllocator->pthis, pmfxOutSurface->Data.MemId, &(pmfxOutSurface->Data)); MSDK_BREAK_ON_ERROR(sts); outMan.Render(pmfxOutSurface); sts = pMfxAllocator->Unlock(pMfxAllocator->pthis, pmfxOutSurface->Data.MemId, &(pmfxOutSurface->Data)); } printf("Frame number: %d\r", nFrame); fflush(stdout); } } // MFX_ERR_MORE_DATA indicates that all buffers has been fetched, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxGetTime(&tEnd); elapsed += TimeDiffMsec(tEnd, tStart) / 1000; double fps = ((double)nFrame / elapsed); printf("\nExecution time: %3.2f s (%3.2f fps)\n", elapsed, fps); return sts; }
mfxStatus QSV_Encoder_Internal::Drain() { mfxStatus sts = MFX_ERR_NONE; // // Drain the buffered encoded frames // while (MFX_ERR_NONE <= sts) { int nTaskIdx = GetFreeTaskIndex(m_pTaskPool, m_nTaskPool); if (MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = m_session.SyncOperation(m_pTaskPool[m_nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); m_pTaskPool[m_nFirstSyncTask].syncp = NULL; m_nFirstSyncTask = (m_nFirstSyncTask + 1) % m_nTaskPool; } else { for (;;) { // Encode a frame asychronously (returns immediately) sts = m_pmfxENC->EncodeFrameAsync(NULL, NULL, &m_pTaskPool[nTaskIdx].mfxBS, &m_pTaskPool[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !m_pTaskPool[nTaskIdx].syncp) { // Repeat the call if warning and no output if (MFX_WRN_DEVICE_BUSY == sts) MSDK_SLEEP(1); // Wait if device is busy, then repeat the same call } else if (MFX_ERR_NONE < sts && m_pTaskPool[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // Ignore warnings if output is available break; } else break; } } } // MFX_ERR_MORE_DATA indicates that there are no more buffered frames, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Sync all remaining tasks in task pool // while (m_pTaskPool[m_nFirstSyncTask].syncp) { sts = m_session.SyncOperation(m_pTaskPool[m_nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); m_pTaskPool[m_nFirstSyncTask].syncp = NULL; m_nFirstSyncTask = (m_nFirstSyncTask + 1) % m_nTaskPool; } return sts; }
mfxStatus QSV_Encoder_Internal::Open(qsv_param_t * pParams) { mfxStatus sts = MFX_ERR_NONE; if (m_bUseD3D11) // Use D3D11 surface sts = Initialize(m_impl, m_ver, &m_session, &m_mfxAllocator); else // Use system memory sts = Initialize(m_impl, m_ver, &m_session, NULL); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); m_pmfxENC = new MFXVideoENCODE(m_session); InitParams(pParams); sts = m_pmfxENC->Query(&m_mfxEncParams, &m_mfxEncParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_INCOMPATIBLE_VIDEO_PARAM); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = AllocateSurfaces(); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = m_pmfxENC->Init(&m_mfxEncParams); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = GetVideoParam(); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = InitBitstream(); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); return sts; }
mfxStatus CQuickSyncDecoder::DecodeHeader(mfxBitstream* bs, mfxVideoParam* par) { MSDK_CHECK_POINTER(m_pmfxDEC && bs && par, MFX_ERR_NULL_PTR); mfxStatus sts = m_pmfxDEC->DecodeHeader(bs, par); // Try again, marking the bitstream as complete // This workaround should work on all driver versions. But it doesn't work on 15.28 & 15.31 if (MFX_ERR_MORE_DATA == sts) { mfxU16 oldFlag = bs->DataFlag; bs->DataFlag = MFX_BITSTREAM_COMPLETE_FRAME; sts = m_pmfxDEC->DecodeHeader(bs, par); bs->DataFlag = oldFlag; } // Another workaround for 15.28 and 15.31 drivers if (MFX_ERR_MORE_DATA == sts && par->mfx.CodecId == MFX_CODEC_AVC) { mfxBitstream bs2 = *bs; bs2.Data = new mfxU8[bs->DataLength + 5]; memcpy(bs2.Data, bs->Data + bs->DataOffset, bs->DataLength - bs->DataOffset); bs2.MaxLength = bs2.DataLength = bs->DataLength + 5 - bs->DataOffset; // Write H264 start code + start of splice section *((unsigned*)(bs2.Data + bs2.DataLength - 5)) = 0x01000000; bs2.Data[bs2.DataLength -1] = 1; // write SPLICE NALU sts = m_pmfxDEC->DecodeHeader(&bs2, par); delete[] bs2.Data; // Cleanup } MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); return sts; }
mfxStatus IntelDecoder::QueryAndAllocRequiredSurfacesForSW() { mfxStatus sts = MFX_ERR_NONE; // Query number of required surfaces for decoder mfxFrameAllocRequest DecRequest; memset(&DecRequest, 0, sizeof(DecRequest)); sts = mfxDEC->QueryIOSurf(&mfxVideoParams, &DecRequest); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); numSurfaces = DecRequest.NumFrameSuggested; //VPPRequest[0].Type |= WILL_WRITE; // This line is only required for Windows DirectX11 to ensure that surfaces can be written to by the application //DecRequest.Type |= WILL_READ; // This line is only required for Windows DirectX11 to ensure that surfaces can be retrieved by the application // Allocate surfaces for decoder // - Width and height of buffer must be aligned, a multiple of 32 // - Frame surface array keeps pointers all surface planes and general frame info mfxU16 width = (mfxU16)MSDK_ALIGN(DecRequest.Info.Width); mfxU16 height = (mfxU16)MSDK_ALIGN16(DecRequest.Info.Height); mfxU8 bitsPerPixel = 12; // NV12 format is a 12 bits per pixel format mfxU32 surfaceSize = width * height * bitsPerPixel / 8; mfxU8* surfaceBuffers = (mfxU8*) new mfxU8[surfaceSize * numSurfaces]; // Allocate surface headers (mfxFrameSurface1) for decoder pmfxSurfaces = new mfxFrameSurface1 *[numSurfaces]; MSDK_CHECK_POINTER(pmfxSurfaces, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < numSurfaces; i++) { pmfxSurfaces[i] = new mfxFrameSurface1; memset(pmfxSurfaces[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pmfxSurfaces[i]->Info), &(mfxVideoParams.mfx.FrameInfo), sizeof(mfxFrameInfo)); pmfxSurfaces[i]->Data.Y = &surfaceBuffers[surfaceSize * i]; pmfxSurfaces[i]->Data.U = pmfxSurfaces[i]->Data.Y + width * height; pmfxSurfaces[i]->Data.V = pmfxSurfaces[i]->Data.U + 1; pmfxSurfaces[i]->Data.Pitch = width; } return sts; }
/* Methods required for integration with Media SDK */ mfxStatus Rotate::PluginInit(mfxCoreInterface *core) { MSDK_CHECK_POINTER(core, MFX_ERR_NULL_PTR); mfxStatus sts = MFX_ERR_NONE; MSDK_SAFE_DELETE(m_pmfxCore); m_pmfxCore = new mfxCoreInterface; MSDK_CHECK_POINTER(m_pmfxCore, MFX_ERR_MEMORY_ALLOC); *m_pmfxCore = *core; mfxCoreParam par = {0}; sts = m_pmfxCore->GetCoreParam(m_pmfxCore->pthis, &par); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); m_impl = par.Impl; mfxHDL hdl = 0; #if defined(_WIN32) || defined(_WIN64) if (MFX_IMPL_VIA_MASK(m_impl) == MFX_IMPL_VIA_D3D9) { sts = m_pmfxCore->GetHandle(m_pmfxCore->pthis, MFX_HANDLE_D3D9_DEVICE_MANAGER, &m_device); } else if (MFX_IMPL_VIA_MASK(m_impl) == MFX_IMPL_VIA_D3D11) { sts = m_pmfxCore->GetHandle(m_pmfxCore->pthis, MFX_HANDLE_D3D11_DEVICE, &m_device); } else { hdl = 0; } #else sts = m_pmfxCore->GetHandle(m_pmfxCore->pthis, MFX_HANDLE_VA_DISPLAY, &m_device); #endif // SW lib is used if GetHandle return MFX_ERR_NOT_FOUND MSDK_IGNORE_MFX_STS(sts, MFX_ERR_NOT_FOUND); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // if external allocator not set use the one from core interface if (!m_pAlloc && m_pmfxCore->FrameAllocator.pthis) m_pAlloc = &m_pmfxCore->FrameAllocator; return MFX_ERR_NONE; }
mfxStatus QSV_Encoder_Internal::Open(qsv_param_t * pParams) { mfxStatus sts = MFX_ERR_NONE; InitParams(pParams); sts = m_pmfxENC->Query(&m_mfxEncParams, &m_mfxEncParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_INCOMPATIBLE_VIDEO_PARAM); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = AllocateSurfaces(); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = m_pmfxENC->Init(&m_mfxEncParams); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = GetVideoParam(); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = InitBitstream(); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); return sts; }
mfxStatus IntelDecoder::InitializeX(HWND hWnd) { if (SetDecodeOptions() == MFX_ERR_NULL_PTR) { fprintf_s(stdout, "Source file couldn't be found."); return MFX_ERR_NULL_PTR; } // Open input H.264 elementary stream (ES) file MSDK_FOPEN(fSource, options.SourceName, "rb"); MSDK_CHECK_POINTER(fSource, MFX_ERR_NULL_PTR); mfxIMPL impl = options.impl; //Version 1.3 is selected for Video Conference Mode compatibility. mfxVersion ver = { { 3, 1 } }; pSession = new MFXVideoSession(); pMfxAllocator = (mfxFrameAllocator*)malloc(sizeof(mfxFrameAllocator)); memset(pMfxAllocator, 0, sizeof(mfxFrameAllocator)); mfxStatus sts = Initialize(impl, ver, pSession, pMfxAllocator); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = pSession->QueryIMPL(&impl_type); if (impl_type == MFX_IMPL_SOFTWARE) { printf("Implementation type is : SOFTWARE\n"); } else { printf("Implementation type is : HARDWARE\n"); } //impl_type = 2; // Create Media SDK decoder mfxDEC = new MFXVideoDECODE(*pSession); SetDecParameters(); // Prepare Media SDK bit stream buffer memset(&mfxBS, 0, sizeof(mfxBS)); mfxBS.DataFlag = MFX_BITSTREAM_COMPLETE_FRAME; mfxBS.MaxLength = 1024 * 1024; mfxBS.Data = new mfxU8[mfxBS.MaxLength]; MSDK_CHECK_POINTER(mfxBS.Data, MFX_ERR_MEMORY_ALLOC); // Read a chunk of data from stream file into bit stream buffer // - Parse bit stream, searching for header and fill video parameters structure // - Abort if bit stream header is not found in the first bit stream buffer chunk sts = ReadBitStreamData(&mfxBS, fSource); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = mfxDEC->DecodeHeader(&mfxBS, &mfxVideoParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxVideoParams.AsyncDepth = 1; outMan.InitD3D(hWnd, mfxVideoParams.mfx.FrameInfo.CropW, mfxVideoParams.mfx.FrameInfo.CropH); // Query selected implementation and version if (impl_type == MFX_IMPL_SOFTWARE) { sts = QueryAndAllocRequiredSurfacesForSW(); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); } else { sts = QueryAndAllocRequiredSurfacesForHW(); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); } // Initialize the Media SDK decoder sts = mfxDEC->Init(&mfxVideoParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); }
int main() { mfxStatus sts = MFX_ERR_NONE; mfxU16 inputWidth = 1920; mfxU16 inputHeight = 1080; // ===================================================================== // Intel Media SDK encode pipeline setup // - In this example we are encoding an AVC (H.264) stream // - Video memory surfaces are used // - Asynchronous operation by executing more than one encode operation simultaneously // // Open input YV12 YUV file FILE* fSource; fopen_s(&fSource, "bbb1920x1080.yuv", "rb"); MSDK_CHECK_POINTER(fSource, MFX_ERR_NULL_PTR); // Create output elementary stream (ES) H.264 file FILE* fSink; fopen_s(&fSink, "test_d3d_async.264", "wb"); MSDK_CHECK_POINTER(fSink, MFX_ERR_NULL_PTR); // Initialize Media SDK session // - MFX_IMPL_AUTO_ANY selects HW accelaration if available (on any adapter) // - Version 1.0 is selected for greatest backwards compatibility. // If more recent API features are needed, change the version accordingly mfxIMPL impl = MFX_IMPL_AUTO_ANY; #ifdef DX11_D3D impl |= MFX_IMPL_VIA_D3D11; #endif mfxVersion ver = {0, 1}; MFXVideoSession mfxSession; sts = mfxSession.Init(impl, &ver); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create DirectX device context mfxHDL deviceHandle; sts = CreateHWDevice(mfxSession, &deviceHandle, NULL); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Provide device manager to Media SDK sts = mfxSession.SetHandle(DEVICE_MGR_TYPE, deviceHandle); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxFrameAllocator mfxAllocator; mfxAllocator.Alloc = simple_alloc; mfxAllocator.Free = simple_free; mfxAllocator.Lock = simple_lock; mfxAllocator.Unlock = simple_unlock; mfxAllocator.GetHDL = simple_gethdl; // When using video memory we must provide Media SDK with an external allocator sts = mfxSession.SetFrameAllocator(&mfxAllocator); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize encoder parameters mfxVideoParam mfxEncParams; memset(&mfxEncParams, 0, sizeof(mfxEncParams)); mfxEncParams.mfx.CodecId = MFX_CODEC_AVC; mfxEncParams.mfx.TargetUsage = MFX_TARGETUSAGE_BALANCED; mfxEncParams.mfx.TargetKbps = 2000; mfxEncParams.mfx.RateControlMethod = MFX_RATECONTROL_VBR; mfxEncParams.mfx.FrameInfo.FrameRateExtN = 30; mfxEncParams.mfx.FrameInfo.FrameRateExtD = 1; mfxEncParams.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12; mfxEncParams.mfx.FrameInfo.ChromaFormat = MFX_CHROMAFORMAT_YUV420; mfxEncParams.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; mfxEncParams.mfx.FrameInfo.CropX = 0; mfxEncParams.mfx.FrameInfo.CropY = 0; mfxEncParams.mfx.FrameInfo.CropW = inputWidth; mfxEncParams.mfx.FrameInfo.CropH = inputHeight; // Width must be a multiple of 16 // Height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture mfxEncParams.mfx.FrameInfo.Width = MSDK_ALIGN16(inputWidth); mfxEncParams.mfx.FrameInfo.Height = (MFX_PICSTRUCT_PROGRESSIVE == mfxEncParams.mfx.FrameInfo.PicStruct)? MSDK_ALIGN16(inputHeight) : MSDK_ALIGN32(inputHeight); mfxEncParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY; // Configure Media SDK to keep more operations in flight // - AsyncDepth represents the number of tasks that can be submitted, before synchronizing is required // - The choice of AsyncDepth = 4 is quite arbitrary but has proven to result in good performance mfxEncParams.AsyncDepth = 4; // Create Media SDK encoder MFXVideoENCODE mfxENC(mfxSession); // Validate video encode parameters (optional) // - In this example the validation result is written to same structure // - MFX_WRN_INCOMPATIBLE_VIDEO_PARAM is returned if some of the video parameters are not supported, // instead the encoder will select suitable parameters closest matching the requested configuration sts = mfxENC.Query(&mfxEncParams, &mfxEncParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_INCOMPATIBLE_VIDEO_PARAM); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number of required surfaces for encoder mfxFrameAllocRequest EncRequest; memset(&EncRequest, 0, sizeof(EncRequest)); sts = mfxENC.QueryIOSurf(&mfxEncParams, &EncRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); #ifdef DX11_D3D EncRequest.Type |= WILL_WRITE; // Hint to DX11 memory handler that application will write data to input surfaces #endif // Allocate required surfaces mfxFrameAllocResponse mfxResponse; sts = mfxAllocator.Alloc(mfxAllocator.pthis, &EncRequest, &mfxResponse); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxU16 nEncSurfNum = mfxResponse.NumFrameActual; // Allocate surface headers (mfxFrameSurface1) for decoder mfxFrameSurface1** pmfxSurfaces = new mfxFrameSurface1*[nEncSurfNum]; MSDK_CHECK_POINTER(pmfxSurfaces, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nEncSurfNum; i++) { pmfxSurfaces[i] = new mfxFrameSurface1; memset(pmfxSurfaces[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pmfxSurfaces[i]->Info), &(mfxEncParams.mfx.FrameInfo), sizeof(mfxFrameInfo)); pmfxSurfaces[i]->Data.MemId = mfxResponse.mids[i]; // MID (memory id) represent one D3D NV12 surface #ifndef ENABLE_INPUT // In case simulating direct access to frames we initialize the allocated surfaces with default pattern // - For true benchmark comparisons to async workloads all surfaces must have the same data #ifndef DX11_D3D IDirect3DSurface9 *pSurface; D3DSURFACE_DESC desc; D3DLOCKED_RECT locked; pSurface = (IDirect3DSurface9 *)mfxResponse.mids[i]; pSurface->GetDesc(&desc); pSurface->LockRect(&locked, 0, D3DLOCK_NOSYSLOCK); memset((mfxU8 *)locked.pBits, 100, desc.Height*locked.Pitch); // Y plane memset((mfxU8 *)locked.pBits + desc.Height * locked.Pitch, 50, (desc.Height*locked.Pitch)/2); // UV plane pSurface->UnlockRect(); #else // For now, just leave D3D11 surface data uninitialized #endif #endif } // Initialize the Media SDK encoder sts = mfxENC.Init(&mfxEncParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Retrieve video parameters selected by encoder. // - BufferSizeInKB parameter is required to set bit stream buffer size mfxVideoParam par; memset(&par, 0, sizeof(par)); sts = mfxENC.GetVideoParam(&par); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create task pool to improve asynchronous performance (greater GPU utilization) mfxU16 taskPoolSize = mfxEncParams.AsyncDepth; // number of tasks that can be submitted, before synchronizing is required Task* pTasks = new Task[taskPoolSize]; memset(pTasks, 0, sizeof(Task) * taskPoolSize); for(int i=0;i<taskPoolSize;i++) { // Prepare Media SDK bit stream buffer pTasks[i].mfxBS.MaxLength = par.mfx.BufferSizeInKB * 1000; pTasks[i].mfxBS.Data = new mfxU8[pTasks[i].mfxBS.MaxLength]; MSDK_CHECK_POINTER(pTasks[i].mfxBS.Data, MFX_ERR_MEMORY_ALLOC); } // =================================== // Start encoding the frames // #ifdef ENABLE_BENCHMARK LARGE_INTEGER tStart, tEnd; QueryPerformanceFrequency(&tStart); double freq = (double)tStart.QuadPart; QueryPerformanceCounter(&tStart); #endif int nEncSurfIdx = 0; int nTaskIdx = 0; int nFirstSyncTask = 0; mfxU32 nFrame = 0; // // Stage 1: Main encoding loop // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = mfxSession.SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT printf("Frame number: %d\r", nFrame); #endif } else { nEncSurfIdx = GetFreeSurfaceIndex(pmfxSurfaces, nEncSurfNum); // Find free frame surface if (MFX_ERR_NOT_FOUND == nEncSurfIdx) return MFX_ERR_MEMORY_ALLOC; // Surface locking required when read/write D3D surfaces sts = mfxAllocator.Lock(mfxAllocator.pthis, pmfxSurfaces[nEncSurfIdx]->Data.MemId, &(pmfxSurfaces[nEncSurfIdx]->Data)); MSDK_BREAK_ON_ERROR(sts); sts = LoadRawFrame(pmfxSurfaces[nEncSurfIdx], fSource); MSDK_BREAK_ON_ERROR(sts); sts = mfxAllocator.Unlock(mfxAllocator.pthis, pmfxSurfaces[nEncSurfIdx]->Data.MemId, &(pmfxSurfaces[nEncSurfIdx]->Data)); MSDK_BREAK_ON_ERROR(sts); for (;;) { // Encode a frame asychronously (returns immediately) sts = mfxENC.EncodeFrameAsync(NULL, pmfxSurfaces[nEncSurfIdx], &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // Repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // Wait if device is busy, then repeat the same call } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // Ignore warnings if output is available break; } else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) { // Allocate more bitstream buffer memory here if needed... break; } else break; } } } // MFX_ERR_MORE_DATA means that the input file has ended, need to go to buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 2: Retrieve the buffered encoded frames // while (MFX_ERR_NONE <= sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = mfxSession.SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT printf("Frame number: %d\r", nFrame); #endif } else { for (;;) { // Encode a frame asychronously (returns immediately) sts = mfxENC.EncodeFrameAsync(NULL, NULL, &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // Repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // Wait if device is busy, then repeat the same call } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // Ignore warnings if output is available break; } else break; } } } // MFX_ERR_MORE_DATA indicates that there are no more buffered frames, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 3: Sync all remaining tasks in task pool // while(pTasks[nFirstSyncTask].syncp) { sts = mfxSession.SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT printf("Frame number: %d\r", nFrame); #endif } #ifdef ENABLE_BENCHMARK QueryPerformanceCounter(&tEnd); double duration = ((double)tEnd.QuadPart - (double)tStart.QuadPart) / freq; printf("\nExecution time: %3.2fs (%3.2ffps)\n", duration, nFrame/duration); #endif // =================================================================== // Clean up resources // - It is recommended to close Media SDK components first, before releasing allocated surfaces, since // some surfaces may still be locked by internal Media SDK resources. mfxENC.Close(); // mfxSession closed automatically on destruction for (int i = 0; i < nEncSurfNum; i++) delete pmfxSurfaces[i]; MSDK_SAFE_DELETE_ARRAY(pmfxSurfaces); for(int i=0;i<taskPoolSize;i++) MSDK_SAFE_DELETE_ARRAY(pTasks[i].mfxBS.Data); MSDK_SAFE_DELETE_ARRAY(pTasks); fclose(fSource); fclose(fSink); CleanupHWDevice(); return 0; }
extern "C" __declspec(dllexport) void *openEncoder(int *pErrorCode, int width, int height, int bitRate, int gop) { *pErrorCode = 0; IntelEncoderHandle *pHandle = (IntelEncoderHandle *) malloc(sizeof(IntelEncoderHandle)); mfxStatus sts = MFX_ERR_NONE; mfxIMPL impl = MFX_IMPL_AUTO_ANY; mfxVersion ver; ver.Major = 1; ver.Minor = 0; sts = MFXInit(impl, &ver, &pHandle->session); if (MFX_ERR_NONE != sts) { // TODO: *pErrorCode = -1; } MFXQueryIMPL(pHandle->session, &impl); mfxVersion verTemp; MFXQueryVersion(pHandle->session, &verTemp); mfxVideoParam mfxEncParams; memset(&mfxEncParams, 0, sizeof(mfxEncParams)); mfxEncParams.mfx.CodecId = MFX_CODEC_AVC; // mfxEncParams.mfx.CodecProfile = MFX_PROFILE_AVC_CONSTRAINED_BASELINE; mfxEncParams.mfx.TargetUsage = MFX_TARGETUSAGE_BALANCED; if (0 == bitRate) bitRate = 128 * 8; mfxEncParams.mfx.TargetKbps = bitRate; mfxEncParams.mfx.RateControlMethod = MFX_RATECONTROL_VBR; mfxEncParams.mfx.FrameInfo.FrameRateExtN = 30; mfxEncParams.mfx.FrameInfo.FrameRateExtD = 1; mfxEncParams.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12; mfxEncParams.mfx.FrameInfo.ChromaFormat = MFX_CHROMAFORMAT_YUV420; mfxEncParams.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; mfxEncParams.mfx.FrameInfo.CropX = 0; mfxEncParams.mfx.FrameInfo.CropY = 0; mfxEncParams.mfx.FrameInfo.CropW = width; mfxEncParams.mfx.FrameInfo.CropH = height; // Width must be a multiple of 16 // Height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture mfxEncParams.mfx.FrameInfo.Width = MSDK_ALIGN16(width); mfxEncParams.mfx.FrameInfo.Height = (MFX_PICSTRUCT_PROGRESSIVE == mfxEncParams.mfx.FrameInfo.PicStruct) ? MSDK_ALIGN16(width) : MSDK_ALIGN32(height); mfxEncParams.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY; sts = MFXVideoENCODE_Query(pHandle->session, &mfxEncParams, &mfxEncParams); if (MFX_ERR_NONE != sts) { // TODO: *pErrorCode = -2; } mfxFrameAllocRequest EncRequest; memset(&EncRequest, 0, sizeof(EncRequest)); sts = MFXVideoENCODE_QueryIOSurf(pHandle->session, &mfxEncParams, &EncRequest); if (MFX_ERR_NONE != sts) { // TODO: *pErrorCode = -3; } pHandle->nEncSurfNum = EncRequest.NumFrameSuggested; mfxU16 w = (mfxU16)MSDK_ALIGN32(EncRequest.Info.Width); mfxU16 h = (mfxU16)MSDK_ALIGN32(EncRequest.Info.Height); mfxU8 bitsPerPixel = 12; // NV12 format is a 12 bits per pixel format mfxU32 surfaceSize = w * h * bitsPerPixel / 8; pHandle->pSurfaceBuffers = (mfxU8 *)malloc(surfaceSize * pHandle->nEncSurfNum * sizeof(mfxU8)); pHandle->ppEncSurfaces = (mfxFrameSurface1 **)malloc(sizeof(mfxFrameSurface1*)* pHandle->nEncSurfNum); for (int i = 0; i < pHandle->nEncSurfNum; i++) { pHandle->ppEncSurfaces[i] = (mfxFrameSurface1 *)malloc(sizeof(mfxFrameSurface1)); memset(pHandle->ppEncSurfaces[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pHandle->ppEncSurfaces[i]->Info), &(mfxEncParams.mfx.FrameInfo), sizeof(mfxFrameInfo)); pHandle->ppEncSurfaces[i]->Data.Y = &pHandle->pSurfaceBuffers[surfaceSize * i]; pHandle->ppEncSurfaces[i]->Data.U = pHandle->ppEncSurfaces[i]->Data.Y + w * h; pHandle->ppEncSurfaces[i]->Data.V = pHandle->ppEncSurfaces[i]->Data.U + 1; pHandle->ppEncSurfaces[i]->Data.Pitch = w; // In case simulating direct access to frames we initialize the allocated surfaces with default pattern // - For true benchmark comparisons to async workloads all surfaces must have the same data memset(pHandle->ppEncSurfaces[i]->Data.Y, 100, w * h); // Y plane memset(pHandle->ppEncSurfaces[i]->Data.U, 50, (w * h) / 2); // UV plane } sts = MFXVideoENCODE_Init(pHandle->session, &mfxEncParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); if (MFX_ERR_NONE != sts) { // TODO: *pErrorCode = -4; } mfxVideoParam par; memset(&par, 0, sizeof(par)); sts = MFXVideoENCODE_GetVideoParam(pHandle->session, &par); if (MFX_ERR_NONE != sts) { // TODO: *pErrorCode = -5; } memset(&pHandle->mfxBS, 0, sizeof(pHandle->mfxBS)); pHandle->mfxBS.MaxLength = par.mfx.BufferSizeInKB * 1024; pHandle->mfxBS.Data = (mfxU8 *)malloc(sizeof(mfxU8)* pHandle->mfxBS.MaxLength); return pHandle; }
DWORD WINAPI TranscodeThread(LPVOID arg) { ThreadData *pData = (ThreadData *)arg; int id = pData->id; mfxStatus sts = MFX_ERR_NONE; // ===================================================================== // Intel Media SDK transcode opaque pipeline setup // - Transcode H.264 to H.264, resizing the encoded stream to half the resolution using VPP // - Multiple streams are transcoded concurrently // - Same input stream is used for all concurrent threadcoding threads // // Open input H.264 elementary stream (ES) file FILE* fSource; char inFile[100] = "bbb640x480.264"; fopen_s(&fSource, inFile, "rb"); MSDK_CHECK_POINTER(fSource, MFX_ERR_NULL_PTR); // Create output elementary stream (ES) H.264 file FILE* fSink; char outFile[100] = "bbb320x240_xx.264"; outFile[11] = '0' + (char)(id/10); outFile[12] = '0' + (char)(id%10); fopen_s(&fSink, outFile, "wb"); MSDK_CHECK_POINTER(fSink, MFX_ERR_NULL_PTR); MFXVideoSession* pmfxSession = NULL; // Initialize Media SDK session // - MFX_IMPL_AUTO_ANY selects HW accelaration if available (on any adapter) // - Version 1.3 is selected since the opaque memory feature was added in this API release // If more recent API features are needed, change the version accordingly mfxIMPL impl = MFX_IMPL_AUTO_ANY; mfxVersion ver = {3, 1}; // Note: API 1.3 ! pmfxSession = new MFXVideoSession; MSDK_CHECK_POINTER(pmfxSession, MFX_ERR_NULL_PTR); sts = pmfxSession->Init(impl, &ver); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create Media SDK decoder & encoder & VPP MFXVideoDECODE* pmfxDEC = new MFXVideoDECODE(*pmfxSession); MSDK_CHECK_POINTER(pmfxDEC, MFX_ERR_NULL_PTR); MFXVideoENCODE* pmfxENC = new MFXVideoENCODE(*pmfxSession); MSDK_CHECK_POINTER(pmfxENC, MFX_ERR_NULL_PTR); MFXVideoVPP* pmfxVPP = new MFXVideoVPP(*pmfxSession); MSDK_CHECK_POINTER(pmfxVPP, MFX_ERR_NULL_PTR); // Set required video parameters for decode mfxVideoParam mfxDecParams; memset(&mfxDecParams, 0, sizeof(mfxDecParams)); mfxDecParams.mfx.CodecId = MFX_CODEC_AVC; mfxDecParams.IOPattern = MFX_IOPATTERN_OUT_OPAQUE_MEMORY; // Configure Media SDK to keep more operations in flight // - AsyncDepth represents the number of tasks that can be submitted, before synchronizing is required // - The choice of AsyncDepth = 3 is quite arbitrary but has proven to result in good performance mfxDecParams.AsyncDepth = 3; // Prepare Media SDK bit stream buffer for decoder // - Arbitrary buffer size for this example mfxBitstream mfxBS; memset(&mfxBS, 0, sizeof(mfxBS)); mfxBS.MaxLength = 1024 * 1024; mfxBS.Data = new mfxU8[mfxBS.MaxLength]; MSDK_CHECK_POINTER(mfxBS.Data, MFX_ERR_MEMORY_ALLOC); // Read a chunk of data from stream file into bit stream buffer // - Parse bit stream, searching for header and fill video parameters structure // - Abort if bit stream header is not found in the first bit stream buffer chunk sts = ReadBitStreamData(&mfxBS, fSource); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = pmfxDEC->DecodeHeader(&mfxBS, &mfxDecParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize VPP parameters mfxVideoParam VPPParams; memset(&VPPParams, 0, sizeof(VPPParams)); // Input data VPPParams.vpp.In.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.In.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.In.CropX = 0; VPPParams.vpp.In.CropY = 0; VPPParams.vpp.In.CropW = mfxDecParams.mfx.FrameInfo.CropW; VPPParams.vpp.In.CropH = mfxDecParams.mfx.FrameInfo.CropH; VPPParams.vpp.In.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.In.FrameRateExtN = 30; VPPParams.vpp.In.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.In.Width = MSDK_ALIGN16(VPPParams.vpp.In.CropW); VPPParams.vpp.In.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.In.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.In.CropH) : MSDK_ALIGN32(VPPParams.vpp.In.CropH); // Output data VPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.Out.CropX = 0; VPPParams.vpp.Out.CropY = 0; VPPParams.vpp.Out.CropW = VPPParams.vpp.In.CropW/2; // Half the resolution of decode stream VPPParams.vpp.Out.CropH = VPPParams.vpp.In.CropH/2; VPPParams.vpp.Out.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.Out.FrameRateExtN = 30; VPPParams.vpp.Out.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.Out.Width = MSDK_ALIGN16(VPPParams.vpp.Out.CropW); VPPParams.vpp.Out.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.Out.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.Out.CropH) : MSDK_ALIGN32(VPPParams.vpp.Out.CropH); VPPParams.IOPattern = MFX_IOPATTERN_IN_OPAQUE_MEMORY | MFX_IOPATTERN_OUT_OPAQUE_MEMORY; // Configure Media SDK to keep more operations in flight // - AsyncDepth represents the number of tasks that can be submitted, before synchronizing is required VPPParams.AsyncDepth = mfxDecParams.AsyncDepth; // Initialize encoder parameters mfxVideoParam mfxEncParams; memset(&mfxEncParams, 0, sizeof(mfxEncParams)); mfxEncParams.mfx.CodecId = MFX_CODEC_AVC; mfxEncParams.mfx.TargetUsage = MFX_TARGETUSAGE_BALANCED; mfxEncParams.mfx.TargetKbps = 500; mfxEncParams.mfx.RateControlMethod = MFX_RATECONTROL_VBR; mfxEncParams.mfx.FrameInfo.FrameRateExtN = 30; mfxEncParams.mfx.FrameInfo.FrameRateExtD = 1; mfxEncParams.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12; mfxEncParams.mfx.FrameInfo.ChromaFormat = MFX_CHROMAFORMAT_YUV420; mfxEncParams.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; mfxEncParams.mfx.FrameInfo.CropX = 0; mfxEncParams.mfx.FrameInfo.CropY = 0; mfxEncParams.mfx.FrameInfo.CropW = VPPParams.vpp.Out.CropW; // Half the resolution of decode stream mfxEncParams.mfx.FrameInfo.CropH = VPPParams.vpp.Out.CropH; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture mfxEncParams.mfx.FrameInfo.Width = MSDK_ALIGN16(mfxEncParams.mfx.FrameInfo.CropW); mfxEncParams.mfx.FrameInfo.Height = (MFX_PICSTRUCT_PROGRESSIVE == mfxEncParams.mfx.FrameInfo.PicStruct)? MSDK_ALIGN16(mfxEncParams.mfx.FrameInfo.CropH) : MSDK_ALIGN32(mfxEncParams.mfx.FrameInfo.CropH); mfxEncParams.IOPattern = MFX_IOPATTERN_IN_OPAQUE_MEMORY; // Configure Media SDK to keep more operations in flight // - AsyncDepth represents the number of tasks that can be submitted, before synchronizing is required mfxEncParams.AsyncDepth = mfxDecParams.AsyncDepth; // Query number required surfaces for decoder mfxFrameAllocRequest DecRequest; memset(&DecRequest, 0, sizeof(DecRequest)); sts = pmfxDEC->QueryIOSurf(&mfxDecParams, &DecRequest); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number required surfaces for encoder mfxFrameAllocRequest EncRequest; memset(&EncRequest, 0, sizeof(EncRequest)); sts = pmfxENC->QueryIOSurf(&mfxEncParams, &EncRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number of required surfaces for VPP mfxFrameAllocRequest VPPRequest[2];// [0] - in, [1] - out memset(&VPPRequest, 0, sizeof(mfxFrameAllocRequest)*2); sts = pmfxVPP->QueryIOSurf(&VPPParams, VPPRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Determine the required number of surfaces for decoder output (VPP input) and for VPP output (encoder input) mfxU16 nSurfNumDecVPP = DecRequest.NumFrameSuggested + VPPRequest[0].NumFrameSuggested + VPPParams.AsyncDepth; mfxU16 nSurfNumVPPEnc = EncRequest.NumFrameSuggested + VPPRequest[1].NumFrameSuggested + VPPParams.AsyncDepth; // Initialize shared surfaces for decoder, VPP and encode // - Note that no buffer memory is allocated, for opaque memory this is handled by Media SDK internally // - Frame surface array keeps reference to all surfaces // - Opaque memory is configured with the mfxExtOpaqueSurfaceAlloc extended buffers mfxFrameSurface1** pSurfaces = new mfxFrameSurface1*[nSurfNumDecVPP]; MSDK_CHECK_POINTER(pSurfaces, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nSurfNumDecVPP; i++) { pSurfaces[i] = new mfxFrameSurface1; MSDK_CHECK_POINTER(pSurfaces[i], MFX_ERR_MEMORY_ALLOC); memset(pSurfaces[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pSurfaces[i]->Info), &(DecRequest.Info), sizeof(mfxFrameInfo)); } mfxFrameSurface1** pSurfaces2 = new mfxFrameSurface1*[nSurfNumVPPEnc]; MSDK_CHECK_POINTER(pSurfaces2, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nSurfNumVPPEnc; i++) { pSurfaces2[i] = new mfxFrameSurface1; MSDK_CHECK_POINTER(pSurfaces2[i], MFX_ERR_MEMORY_ALLOC); memset(pSurfaces2[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pSurfaces2[i]->Info), &(EncRequest.Info), sizeof(mfxFrameInfo)); } mfxExtOpaqueSurfaceAlloc extOpaqueAllocDec; memset(&extOpaqueAllocDec, 0, sizeof(extOpaqueAllocDec)); extOpaqueAllocDec.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION; extOpaqueAllocDec.Header.BufferSz = sizeof(mfxExtOpaqueSurfaceAlloc); mfxExtBuffer* pExtParamsDec = (mfxExtBuffer*)&extOpaqueAllocDec; mfxExtOpaqueSurfaceAlloc extOpaqueAllocVPP; memset(&extOpaqueAllocVPP, 0, sizeof(extOpaqueAllocVPP)); extOpaqueAllocVPP.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION; extOpaqueAllocVPP.Header.BufferSz = sizeof(mfxExtOpaqueSurfaceAlloc); mfxExtBuffer* pExtParamsVPP = (mfxExtBuffer*)&extOpaqueAllocVPP; mfxExtOpaqueSurfaceAlloc extOpaqueAllocEnc; memset(&extOpaqueAllocEnc, 0, sizeof(extOpaqueAllocEnc)); extOpaqueAllocEnc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION; extOpaqueAllocEnc.Header.BufferSz = sizeof(mfxExtOpaqueSurfaceAlloc); mfxExtBuffer* pExtParamsENC = (mfxExtBuffer*)&extOpaqueAllocEnc; extOpaqueAllocDec.Out.Surfaces = pSurfaces; extOpaqueAllocDec.Out.NumSurface = nSurfNumDecVPP; extOpaqueAllocDec.Out.Type = DecRequest.Type; memcpy(&extOpaqueAllocVPP.In, &extOpaqueAllocDec.Out, sizeof(extOpaqueAllocDec.Out)); extOpaqueAllocVPP.Out.Surfaces = pSurfaces2; extOpaqueAllocVPP.Out.NumSurface = nSurfNumVPPEnc; extOpaqueAllocVPP.Out.Type = EncRequest.Type; memcpy(&extOpaqueAllocEnc.In, &extOpaqueAllocVPP.Out, sizeof(extOpaqueAllocVPP.Out)); mfxDecParams.ExtParam = &pExtParamsDec; mfxDecParams.NumExtParam = 1; VPPParams.ExtParam = &pExtParamsVPP; VPPParams.NumExtParam = 1; mfxEncParams.ExtParam = &pExtParamsENC; mfxEncParams.NumExtParam = 1; // Initialize the Media SDK decoder sts = pmfxDEC->Init(&mfxDecParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize the Media SDK encoder sts = pmfxENC->Init(&mfxEncParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize Media SDK VPP sts = pmfxVPP->Init(&VPPParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Retrieve video parameters selected by encoder. // - BufferSizeInKB parameter is required to set bit stream buffer size mfxVideoParam par; memset(&par, 0, sizeof(par)); sts = pmfxENC->GetVideoParam(&par); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create task pool to improve asynchronous performance (greater GPU utilization) mfxU16 taskPoolSize = mfxEncParams.AsyncDepth; // number of tasks that can be submitted, before synchronizing is required Task* pTasks = new Task[taskPoolSize]; memset(pTasks, 0, sizeof(Task) * taskPoolSize); for(int i=0;i<taskPoolSize;i++) { // Prepare Media SDK bit stream buffer pTasks[i].mfxBS.MaxLength = par.mfx.BufferSizeInKB * 1000; pTasks[i].mfxBS.Data = new mfxU8[pTasks[i].mfxBS.MaxLength]; MSDK_CHECK_POINTER(pTasks[i].mfxBS.Data, MFX_ERR_MEMORY_ALLOC); } // =================================== // Start transcoding the frames // #ifdef ENABLE_BENCHMARK LARGE_INTEGER tStart, tEnd; QueryPerformanceFrequency(&tStart); double freq = (double)tStart.QuadPart; QueryPerformanceCounter(&tStart); #endif mfxSyncPoint syncpD, syncpV; mfxFrameSurface1* pmfxOutSurface = NULL; mfxU32 nFrame = 0; int nIndex = 0; int nIndex2 = 0; int nFirstSyncTask = 0; int nTaskIdx = 0; // // Stage 1: Main transcoding loop // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } else { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // just wait and then repeat the same call to DecodeFrameAsync if (MFX_ERR_MORE_DATA == sts) { sts = ReadBitStreamData(&mfxBS, fSource); // Read more data to input bit stream MSDK_BREAK_ON_ERROR(sts); } if (MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE == sts) { nIndex = GetFreeSurfaceIndex(pSurfaces, nSurfNumDecVPP); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; } // Decode a frame asychronously (returns immediately) sts = pmfxDEC->DecodeFrameAsync(&mfxBS, pSurfaces[nIndex], &pmfxOutSurface, &syncpD); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncpD) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) { nIndex2 = GetFreeSurfaceIndex(pSurfaces2, nSurfNumVPPEnc); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = pmfxVPP->RunFrameVPPAsync(pmfxOutSurface, pSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } // VPP needs more data, let decoder decode another frame as input if (MFX_ERR_MORE_DATA == sts) { continue; } else if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); for (;;) { // Encode a frame asychronously (returns immediately) sts = pmfxENC->EncodeFrameAsync(NULL, pSurfaces2[nIndex2], &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) { // Allocate more bitstream buffer memory here if needed... break; } else break; } } } } // MFX_ERR_MORE_DATA means that file has ended, need to go to buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 2: Retrieve the buffered decoded frames // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_SURFACE == sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } else { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); nIndex = GetFreeSurfaceIndex(pSurfaces, nSurfNumDecVPP); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; // Decode a frame asychronously (returns immediately) sts = pmfxDEC->DecodeFrameAsync(NULL, pSurfaces[nIndex], &pmfxOutSurface, &syncpD); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncpD) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) { nIndex2 = GetFreeSurfaceIndex(pSurfaces2, nSurfNumVPPEnc); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = pmfxVPP->RunFrameVPPAsync(pmfxOutSurface, pSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } // VPP needs more data, let decoder decode another frame as input if (MFX_ERR_MORE_DATA == sts) { continue; } else if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); for (;;) { // Encode a frame asychronously (returns immediately) sts = pmfxENC->EncodeFrameAsync(NULL, pSurfaces2[nIndex2], &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) { // Allocate more bitstream buffer memory here if needed... break; } else break; } } } } // MFX_ERR_MORE_DATA indicates that all decode buffers has been fetched, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 3: Retrieve buffered frames from VPP // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } else { nIndex2 = GetFreeSurfaceIndex(pSurfaces2, nSurfNumVPPEnc); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = pmfxVPP->RunFrameVPPAsync(NULL, pSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); for (;;) { // Encode a frame asychronously (returns immediately) sts = pmfxENC->EncodeFrameAsync(NULL, pSurfaces2[nIndex2], &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) { // Allocate more bitstream buffer memory here if needed... break; } else break; } } } // MFX_ERR_MORE_DATA indicates that all VPP buffers has been fetched, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 4: Retrieve the buffered encoded frames // while (MFX_ERR_NONE <= sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } else { for (;;) { // Encode a frame asychronously (returns immediately) sts = pmfxENC->EncodeFrameAsync(NULL, NULL, &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; } } } // MFX_ERR_MORE_DATA indicates that there are no more buffered frames, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 5: Sync all remaining tasks in task pool // while(pTasks[nFirstSyncTask].syncp) { sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } #ifdef ENABLE_BENCHMARK QueryPerformanceCounter(&tEnd); double duration = ((double)tEnd.QuadPart - (double)tStart.QuadPart) / freq; printf("\n[%d] Execution time: %3.2fs (%3.2ffps)\n", pData->id, duration, nFrame/duration); #endif // =================================================================== // Clean up resources // - It is recommended to close Media SDK components first, before releasing allocated surfaces, since // some surfaces may still be locked by internal Media SDK resources. pmfxENC->Close(); pmfxDEC->Close(); pmfxVPP->Close(); delete pmfxENC; delete pmfxDEC; delete pmfxVPP; pmfxSession->Close(); delete pmfxSession; for (int i = 0; i < nSurfNumDecVPP; i++) delete pSurfaces[i]; for (int i = 0; i < nSurfNumVPPEnc; i++) delete pSurfaces2[i]; MSDK_SAFE_DELETE_ARRAY(pSurfaces); MSDK_SAFE_DELETE_ARRAY(pSurfaces2); MSDK_SAFE_DELETE_ARRAY(mfxBS.Data); for(int i=0;i<taskPoolSize;i++) MSDK_SAFE_DELETE_ARRAY(pTasks[i].mfxBS.Data); MSDK_SAFE_DELETE_ARRAY(pTasks); fclose(fSource); fclose(fSink); return 0; }
int main() { mfxStatus sts = MFX_ERR_NONE; mfxU16 inputWidth = 1920; mfxU16 inputHeight = 1080; // ===================================================================== // Intel Media SDK Video Pre/Post Processing (VPP) pipeline setup // - Showcasing two VPP features // - Resize (frame width and height is halved) // - ProcAmp: Increase brightness // - Video memory surfaces are used // // Open input YV12 YUV file FILE* fSource; fopen_s(&fSource, "bbb1920x1080.yuv", "rb"); MSDK_CHECK_POINTER(fSource, MFX_ERR_NULL_PTR); // Create output YUV file FILE* fSink; fopen_s(&fSink, "bbb960x540_vpp_bright_d3d.yuv", "wb"); MSDK_CHECK_POINTER(fSink, MFX_ERR_NULL_PTR); // Initialize Media SDK session // - MFX_IMPL_AUTO_ANY selects HW accelaration if available (on any adapter) // - Version 1.0 is selected for greatest backwards compatibility. // If more recent API features are needed, change the version accordingly mfxIMPL impl = MFX_IMPL_AUTO_ANY; #ifdef DX11_D3D impl |= MFX_IMPL_VIA_D3D11; #endif mfxVersion ver = {0, 1}; MFXVideoSession mfxSession; sts = mfxSession.Init(impl, &ver); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize VPP parameters mfxVideoParam VPPParams; memset(&VPPParams, 0, sizeof(VPPParams)); // Input data VPPParams.vpp.In.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.In.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.In.CropX = 0; VPPParams.vpp.In.CropY = 0; VPPParams.vpp.In.CropW = inputWidth; VPPParams.vpp.In.CropH = inputHeight; VPPParams.vpp.In.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.In.FrameRateExtN = 30; VPPParams.vpp.In.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.In.Width = MSDK_ALIGN16(inputWidth); VPPParams.vpp.In.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.In.PicStruct)? MSDK_ALIGN16(inputHeight) : MSDK_ALIGN32(inputHeight); // Output data VPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.Out.CropX = 0; VPPParams.vpp.Out.CropY = 0; VPPParams.vpp.Out.CropW = inputWidth/2; VPPParams.vpp.Out.CropH = inputHeight/2; VPPParams.vpp.Out.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.Out.FrameRateExtN = 30; VPPParams.vpp.Out.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.Out.Width = MSDK_ALIGN16(VPPParams.vpp.Out.CropW); VPPParams.vpp.Out.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.Out.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.Out.CropH) : MSDK_ALIGN32(VPPParams.vpp.Out.CropH); VPPParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY; // Create Media SDK VPP component MFXVideoVPP mfxVPP(mfxSession); // Create DirectX device context mfxHDL deviceHandle; sts = CreateHWDevice(mfxSession, &deviceHandle, NULL); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Provide device manager to Media SDK sts = mfxSession.SetHandle(DEVICE_MGR_TYPE, deviceHandle); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxFrameAllocator mfxAllocator; mfxAllocator.Alloc = simple_alloc; mfxAllocator.Free = simple_free; mfxAllocator.Lock = simple_lock; mfxAllocator.Unlock = simple_unlock; mfxAllocator.GetHDL = simple_gethdl; // When using video memory we must provide Media SDK with an external allocator sts = mfxSession.SetFrameAllocator(&mfxAllocator); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number of required surfaces for VPP mfxFrameAllocRequest VPPRequest[2];// [0] - in, [1] - out memset(&VPPRequest, 0, sizeof(mfxFrameAllocRequest)*2); sts = mfxVPP.QueryIOSurf(&VPPParams, VPPRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); #ifdef DX11_D3D VPPRequest[0].Type |= WILL_WRITE; // Hint to DX11 memory handler that application will write data to input surfaces VPPRequest[1].Type |= WILL_READ; // Hint to DX11 memory handler that application will read data from output surfaces #endif // Allocate required surfaces mfxFrameAllocResponse mfxResponseIn; mfxFrameAllocResponse mfxResponseOut; sts = mfxAllocator.Alloc(mfxAllocator.pthis, &VPPRequest[0], &mfxResponseIn); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = mfxAllocator.Alloc(mfxAllocator.pthis, &VPPRequest[1], &mfxResponseOut); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxU16 nVPPSurfNumIn = mfxResponseIn.NumFrameActual; mfxU16 nVPPSurfNumOut = mfxResponseOut.NumFrameActual; // Allocate surface headers (mfxFrameSurface1) for VPP mfxFrameSurface1** pVPPSurfacesIn = new mfxFrameSurface1*[nVPPSurfNumIn]; MSDK_CHECK_POINTER(pVPPSurfacesIn, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nVPPSurfNumIn; i++) { pVPPSurfacesIn[i] = new mfxFrameSurface1; memset(pVPPSurfacesIn[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pVPPSurfacesIn[i]->Info), &(VPPParams.vpp.In), sizeof(mfxFrameInfo)); pVPPSurfacesIn[i]->Data.MemId = mfxResponseIn.mids[i]; // MID (memory id) represent one D3D NV12 surface #ifndef ENABLE_INPUT // In case simulating direct access to frames we initialize the allocated surfaces with default pattern // - For true benchmark comparisons to async workloads all surfaces must have the same data #ifndef DX11_D3D IDirect3DSurface9 *pSurface; D3DSURFACE_DESC desc; D3DLOCKED_RECT locked; pSurface = (IDirect3DSurface9 *)mfxResponseIn.mids[i]; pSurface->GetDesc(&desc); pSurface->LockRect(&locked, 0, D3DLOCK_NOSYSLOCK); memset((mfxU8 *)locked.pBits, 100, desc.Height*locked.Pitch); // Y plane memset((mfxU8 *)locked.pBits + desc.Height * locked.Pitch, 50, (desc.Height*locked.Pitch)/2); // UV plane pSurface->UnlockRect(); #else // For now, just leave D3D11 surface data uninitialized #endif #endif } mfxFrameSurface1** pVPPSurfacesOut = new mfxFrameSurface1*[nVPPSurfNumOut]; MSDK_CHECK_POINTER(pVPPSurfacesOut, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nVPPSurfNumOut; i++) { pVPPSurfacesOut[i] = new mfxFrameSurface1; memset(pVPPSurfacesOut[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pVPPSurfacesOut[i]->Info), &(VPPParams.vpp.Out), sizeof(mfxFrameInfo)); pVPPSurfacesOut[i]->Data.MemId = mfxResponseOut.mids[i]; // MID (memory id) represent one D3D NV12 surface } // Initialize extended buffer for frame processing // - Process amplifier (ProcAmp) used to control brightness // - mfxExtVPPDoUse: Define the processing algorithm to be used // - mfxExtVPPProcAmp: ProcAmp configuration // - mfxExtBuffer: Add extended buffers to VPP parameter configuration mfxExtVPPDoUse extDoUse; mfxU32 tabDoUseAlg[1]; extDoUse.Header.BufferId = MFX_EXTBUFF_VPP_DOUSE; extDoUse.Header.BufferSz = sizeof(mfxExtVPPDoUse); extDoUse.NumAlg = 1; extDoUse.AlgList = tabDoUseAlg; tabDoUseAlg[0] = MFX_EXTBUFF_VPP_PROCAMP; mfxExtVPPProcAmp procampConfig; procampConfig.Header.BufferId = MFX_EXTBUFF_VPP_PROCAMP; procampConfig.Header.BufferSz = sizeof(mfxExtVPPProcAmp); procampConfig.Hue = 0.0f; // Default procampConfig.Saturation = 1.0f; // Default procampConfig.Contrast = 1.0; // Default procampConfig.Brightness = 40.0; // Adjust brightness mfxExtBuffer* ExtBuffer[2]; ExtBuffer[0] = (mfxExtBuffer*)&extDoUse; ExtBuffer[1] = (mfxExtBuffer*)&procampConfig; VPPParams.NumExtParam = 2; VPPParams.ExtParam = (mfxExtBuffer**)&ExtBuffer[0]; // Initialize Media SDK VPP sts = mfxVPP.Init(&VPPParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // =================================== // Start processing the frames // #ifdef ENABLE_BENCHMARK LARGE_INTEGER tStart, tEnd; QueryPerformanceFrequency(&tStart); double freq = (double)tStart.QuadPart; QueryPerformanceCounter(&tStart); #endif int nSurfIdxIn = 0, nSurfIdxOut = 0; mfxSyncPoint syncp; mfxU32 nFrame = 0; // // Stage 1: Main processing loop // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts) { nSurfIdxIn = GetFreeSurfaceIndex(pVPPSurfacesIn, nVPPSurfNumIn); // Find free input frame surface if (MFX_ERR_NOT_FOUND == nSurfIdxIn) return MFX_ERR_MEMORY_ALLOC; // Surface locking required when read/write D3D surfaces sts = mfxAllocator.Lock(mfxAllocator.pthis, pVPPSurfacesIn[nSurfIdxIn]->Data.MemId, &(pVPPSurfacesIn[nSurfIdxIn]->Data)); MSDK_BREAK_ON_ERROR(sts); sts = LoadRawFrame(pVPPSurfacesIn[nSurfIdxIn], fSource); // Load frame from file into surface MSDK_BREAK_ON_ERROR(sts); sts = mfxAllocator.Unlock(mfxAllocator.pthis, pVPPSurfacesIn[nSurfIdxIn]->Data.MemId, &(pVPPSurfacesIn[nSurfIdxIn]->Data)); MSDK_BREAK_ON_ERROR(sts); nSurfIdxOut = GetFreeSurfaceIndex(pVPPSurfacesOut, nVPPSurfNumOut); // Find free output frame surface if (MFX_ERR_NOT_FOUND == nSurfIdxOut) return MFX_ERR_MEMORY_ALLOC; // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(pVPPSurfacesIn[nSurfIdxIn], pVPPSurfacesOut[nSurfIdxOut], NULL, &syncp); if (MFX_ERR_MORE_DATA == sts) continue; // MFX_ERR_MORE_SURFACE means output is ready but need more surface (example: Frame Rate Conversion 30->60) // * Not handled in this example! MSDK_BREAK_ON_ERROR(sts); sts = mfxSession.SyncOperation(syncp, 60000); // Synchronize. Wait until frame processing is ready MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); ++nFrame; #ifdef ENABLE_OUTPUT // Surface locking required when read/write D3D surfaces sts = mfxAllocator.Lock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data)); MSDK_BREAK_ON_ERROR(sts); sts = WriteRawFrame(pVPPSurfacesOut[nSurfIdxOut], fSink); MSDK_BREAK_ON_ERROR(sts); sts = mfxAllocator.Unlock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data)); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } // MFX_ERR_MORE_DATA means that the input file has ended, need to go to buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 2: Retrieve the buffered VPP frames // while (MFX_ERR_NONE <= sts) { nSurfIdxOut = GetFreeSurfaceIndex(pVPPSurfacesOut, nVPPSurfNumOut); // Find free frame surface if (MFX_ERR_NOT_FOUND == nSurfIdxOut) return MFX_ERR_MEMORY_ALLOC; // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(NULL, pVPPSurfacesOut[nSurfIdxOut], NULL, &syncp); MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_SURFACE); MSDK_BREAK_ON_ERROR(sts); sts = mfxSession.SyncOperation(syncp, 60000); // Synchronize. Wait until frame processing is ready MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); ++nFrame; #ifdef ENABLE_OUTPUT // Surface locking required when read/write D3D surfaces sts = mfxAllocator.Lock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data)); MSDK_BREAK_ON_ERROR(sts); sts = WriteRawFrame(pVPPSurfacesOut[nSurfIdxOut], fSink); MSDK_BREAK_ON_ERROR(sts); sts = mfxAllocator.Unlock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data)); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } // MFX_ERR_MORE_DATA indicates that there are no more buffered frames, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); #ifdef ENABLE_BENCHMARK QueryPerformanceCounter(&tEnd); double duration = ((double)tEnd.QuadPart - (double)tStart.QuadPart) / freq; printf("\nExecution time: %3.2fs (%3.2ffps)\n", duration, nFrame/duration); #endif // =================================================================== // Clean up resources // - It is recommended to close Media SDK components first, before releasing allocated surfaces, since // some surfaces may still be locked by internal Media SDK resources. mfxVPP.Close(); //mfxSession closed automatically on destruction for (int i = 0; i < nVPPSurfNumIn; i++) delete pVPPSurfacesIn[i]; MSDK_SAFE_DELETE_ARRAY(pVPPSurfacesIn); for (int i = 0; i < nVPPSurfNumOut; i++) delete pVPPSurfacesOut[i]; MSDK_SAFE_DELETE_ARRAY(pVPPSurfacesOut); fclose(fSource); fclose(fSink); CleanupHWDevice(); return 0; }
mfxStatus CQuickSyncDecoder::InternalReset(mfxVideoParam* pVideoParams, mfxU32 nPitch, bool bInited) { MSDK_CHECK_POINTER(pVideoParams, MFX_ERR_NULL_PTR); MSDK_CHECK_POINTER(m_pmfxDEC, MFX_ERR_NOT_INITIALIZED); mfxStatus sts = MFX_ERR_NONE; m_pVideoParams = pVideoParams; if (NULL == m_pFrameAllocator) { bInited = false; } // Reset decoder if (bInited) { sts = m_pmfxDEC->Reset(pVideoParams); // Need to reset the frame allocator if (MSDK_FAILED(sts)) { m_pmfxDEC->Close(); FreeFrameAllocator(); bInited = false; } if (m_pFrameSurfaces != NULL) { // Another VC1 decoder + VPP bug workaround for (int i = 0; i < m_nRequiredFramesNum; ++i) { m_pFrameSurfaces[i].Data.Locked = 0; m_LockedSurfaces[i] = 0; } } } // Full init if (!bInited) { // Setup allocator - will initialize D3D if needed sts = InitFrameAllocator(pVideoParams, nPitch); MSDK_CHECK_RESULT_P_RET(sts, MFX_ERR_NONE); // Init MSDK decoder sts = m_pmfxDEC->Init(pVideoParams); switch (sts) { case MFX_ERR_NONE: MSDK_TRACE("QsDecoder: decoder Init is successful\n"); break; case MFX_WRN_PARTIAL_ACCELERATION: MSDK_TRACE("QsDecoder: decoder Init is successful w/o HW acceleration\n"); m_bHwAcceleration = false; break; case MFX_WRN_INCOMPATIBLE_VIDEO_PARAM: MSDK_TRACE("QsDecoder: decoder Init is successful - wrong video parameters\n"); break; default: MSDK_TRACE("QsDecoder: decoder Init has failed!\n"); break; } } MSDK_IGNORE_MFX_STS(sts, MFX_WRN_INCOMPATIBLE_VIDEO_PARAM); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); return sts; }
mfxStatus IntelDecoder::RunDecodeAndRender() { mfxStatus sts = MFX_ERR_NONE; // =============================================================== // Start decoding the frames from the stream // mfxGetTime(&tStart); pmfxOutSurface = NULL; pmfxOutSurface_sw = NULL; nIndex = 0; nIndex2 = 0; nFrame = 0; // // Stage 1: Main decoding loop // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts) { if (MFX_WRN_DEVICE_BUSY == sts) MSDK_SLEEP(1); // Wait if device is busy, then repeat the same call to DecodeFrameAsync if (MFX_ERR_MORE_DATA == sts) { sts = ReadBitStreamData(&mfxBS, fSource); // Read more data into input bit stream MSDK_BREAK_ON_ERROR(sts); } if (MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE == sts) { nIndex = GetFreeSurfaceIndex(pmfxSurfaces, numSurfaces); // Find free frame surface MSDK_CHECK_ERROR(MFX_ERR_NOT_FOUND, nIndex, MFX_ERR_MEMORY_ALLOC); } // Decode a frame asychronously (returns immediately) // - If input bitstream contains multiple frames DecodeFrameAsync will start decoding multiple frames, and remove them from bitstream sts = mfxDEC->DecodeFrameAsync(&mfxBS, pmfxSurfaces[nIndex], &pmfxOutSurface, &syncp); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncp) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) sts = pSession->SyncOperation(syncp, 60000); // Synchronize. Wait until decoded frame is ready if (MFX_ERR_NONE == sts) { ++nFrame; if (impl_type == MFX_IMPL_SOFTWARE) { outMan.Render(pmfxOutSurface); } else { // Surface locking required when read/write video surfaces sts = pMfxAllocator->Lock(pMfxAllocator->pthis, pmfxOutSurface->Data.MemId, &(pmfxOutSurface->Data)); MSDK_BREAK_ON_ERROR(sts); outMan.Render(pmfxOutSurface); sts = pMfxAllocator->Unlock(pMfxAllocator->pthis, pmfxOutSurface->Data.MemId, &(pmfxOutSurface->Data)); MSDK_BREAK_ON_ERROR(sts); } printf("Frame number: %d\r", nFrame); fflush(stdout); } } // MFX_ERR_MORE_DATA means that file has ended, need to go to buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxGetTime(&tEnd); elapsed = TimeDiffMsec(tEnd, tStart) / 1000; double fps = ((double)nFrame / elapsed); printf("\nExecution time: %3.2f s (%3.2f fps)\n", elapsed, fps); return sts; }
int main() { mfxStatus sts = MFX_ERR_NONE; // ===================================================================== // Intel Media SDK decode pipeline setup // - In this example we are decoding an AVC (H.264) stream // - For simplistic memory management, system memory surfaces are used to store the decoded frames // (Note that when using HW acceleration D3D surfaces are prefered, for better performance) // // - VPP used to post process (resize) the frame // // Open input H.264 elementary stream (ES) file FILE* fSource; fopen_s(&fSource, "bbb1920x1080.264", "rb"); MSDK_CHECK_POINTER(fSource, MFX_ERR_NULL_PTR); // Create output YUV file FILE* fSink; fopen_s(&fSink, "dectest_960x540.yuv", "wb"); MSDK_CHECK_POINTER(fSink, MFX_ERR_NULL_PTR); // Initialize Media SDK session // - MFX_IMPL_AUTO_ANY selects HW accelaration if available (on any adapter) // - Version 1.0 is selected for greatest backwards compatibility. // If more recent API features are needed, change the version accordingly mfxIMPL impl = MFX_IMPL_AUTO_ANY; mfxVersion ver = {0, 1}; MFXVideoSession mfxSession; sts = mfxSession.Init(impl, &ver); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create Media SDK decoder MFXVideoDECODE mfxDEC(mfxSession); // Create Media SDK VPP component MFXVideoVPP mfxVPP(mfxSession); // Set required video parameters for decode // - In this example we are decoding an AVC (H.264) stream // - For simplistic memory management, system memory surfaces are used to store the decoded frames // (Note that when using HW acceleration D3D surfaces are prefered, for better performance) mfxVideoParam mfxVideoParams; memset(&mfxVideoParams, 0, sizeof(mfxVideoParams)); mfxVideoParams.mfx.CodecId = MFX_CODEC_AVC; mfxVideoParams.IOPattern = MFX_IOPATTERN_OUT_SYSTEM_MEMORY; // Prepare Media SDK bit stream buffer // - Arbitrary buffer size for this example mfxBitstream mfxBS; memset(&mfxBS, 0, sizeof(mfxBS)); mfxBS.MaxLength = 1024 * 1024; mfxBS.Data = new mfxU8[mfxBS.MaxLength]; MSDK_CHECK_POINTER(mfxBS.Data, MFX_ERR_MEMORY_ALLOC); // Read a chunk of data from stream file into bit stream buffer // - Parse bit stream, searching for header and fill video parameters structure // - Abort if bit stream header is not found in the first bit stream buffer chunk sts = ReadBitStreamData(&mfxBS, fSource); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = mfxDEC.DecodeHeader(&mfxBS, &mfxVideoParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize VPP parameters // - For simplistic memory management, system memory surfaces are used to store the raw frames // (Note that when using HW acceleration D3D surfaces are prefered, for better performance) mfxVideoParam VPPParams; memset(&VPPParams, 0, sizeof(VPPParams)); // Input data VPPParams.vpp.In.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.In.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.In.CropX = 0; VPPParams.vpp.In.CropY = 0; VPPParams.vpp.In.CropW = mfxVideoParams.mfx.FrameInfo.CropW; VPPParams.vpp.In.CropH = mfxVideoParams.mfx.FrameInfo.CropH; VPPParams.vpp.In.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.In.FrameRateExtN = 30; VPPParams.vpp.In.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.In.Width = MSDK_ALIGN16(VPPParams.vpp.In.CropW); VPPParams.vpp.In.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.In.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.In.CropH) : MSDK_ALIGN32(VPPParams.vpp.In.CropH); // Output data VPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.Out.CropX = 0; VPPParams.vpp.Out.CropY = 0; VPPParams.vpp.Out.CropW = VPPParams.vpp.In.CropW/2; // Resize to half size resolution VPPParams.vpp.Out.CropH = VPPParams.vpp.In.CropH/2; VPPParams.vpp.Out.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.Out.FrameRateExtN = 30; VPPParams.vpp.Out.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.Out.Width = MSDK_ALIGN16(VPPParams.vpp.Out.CropW); VPPParams.vpp.Out.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.Out.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.Out.CropH) : MSDK_ALIGN32(VPPParams.vpp.Out.CropH); VPPParams.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY | MFX_IOPATTERN_OUT_SYSTEM_MEMORY; // Query number of required surfaces for decoder mfxFrameAllocRequest DecRequest; memset(&DecRequest, 0, sizeof(DecRequest)); sts = mfxDEC.QueryIOSurf(&mfxVideoParams, &DecRequest); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number of required surfaces for VPP mfxFrameAllocRequest VPPRequest[2];// [0] - in, [1] - out memset(&VPPRequest, 0, sizeof(mfxFrameAllocRequest)*2); sts = mfxVPP.QueryIOSurf(&VPPParams, VPPRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Determine the required number of surfaces for decoder output (VPP input) and for VPP output mfxU16 nSurfNumDecVPP = DecRequest.NumFrameSuggested + VPPRequest[0].NumFrameSuggested; mfxU16 nSurfNumVPPOut = VPPRequest[1].NumFrameSuggested; // Allocate surfaces for decoder and VPP In // - Width and height of buffer must be aligned, a multiple of 32 // - Frame surface array keeps pointers all surface planes and general frame info mfxU16 width = (mfxU16)MSDK_ALIGN32(DecRequest.Info.Width); mfxU16 height = (mfxU16)MSDK_ALIGN32(DecRequest.Info.Height); mfxU8 bitsPerPixel = 12; // NV12 format is a 12 bits per pixel format mfxU32 surfaceSize = width * height * bitsPerPixel / 8; mfxU8* surfaceBuffers = (mfxU8 *)new mfxU8[surfaceSize * nSurfNumDecVPP]; mfxFrameSurface1** pmfxSurfaces = new mfxFrameSurface1*[nSurfNumDecVPP]; MSDK_CHECK_POINTER(pmfxSurfaces, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nSurfNumDecVPP; i++) { pmfxSurfaces[i] = new mfxFrameSurface1; memset(pmfxSurfaces[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pmfxSurfaces[i]->Info), &(mfxVideoParams.mfx.FrameInfo), sizeof(mfxFrameInfo)); pmfxSurfaces[i]->Data.Y = &surfaceBuffers[surfaceSize * i]; pmfxSurfaces[i]->Data.U = pmfxSurfaces[i]->Data.Y + width * height; pmfxSurfaces[i]->Data.V = pmfxSurfaces[i]->Data.U + 1; pmfxSurfaces[i]->Data.Pitch = width; } // Allocate surfaces for VPP Out // - Width and height of buffer must be aligned, a multiple of 32 // - Frame surface array keeps pointers all surface planes and general frame info width = (mfxU16)MSDK_ALIGN32(VPPRequest[1].Info.Width); height = (mfxU16)MSDK_ALIGN32(VPPRequest[1].Info.Height); bitsPerPixel = 12; // NV12 format is a 12 bits per pixel format surfaceSize = width * height * bitsPerPixel / 8; mfxU8* surfaceBuffers2 = (mfxU8 *)new mfxU8[surfaceSize * nSurfNumVPPOut]; mfxFrameSurface1** pmfxSurfaces2 = new mfxFrameSurface1*[nSurfNumVPPOut]; MSDK_CHECK_POINTER(pmfxSurfaces2, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nSurfNumVPPOut; i++) { pmfxSurfaces2[i] = new mfxFrameSurface1; memset(pmfxSurfaces2[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pmfxSurfaces2[i]->Info), &(VPPParams.vpp.Out), sizeof(mfxFrameInfo)); pmfxSurfaces2[i]->Data.Y = &surfaceBuffers[surfaceSize * i]; pmfxSurfaces2[i]->Data.U = pmfxSurfaces2[i]->Data.Y + width * height; pmfxSurfaces2[i]->Data.V = pmfxSurfaces2[i]->Data.U + 1; pmfxSurfaces2[i]->Data.Pitch = width; } // Initialize the Media SDK decoder sts = mfxDEC.Init(&mfxVideoParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize Media SDK VPP sts = mfxVPP.Init(&VPPParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // =============================================================== // Start decoding the frames from the stream // #ifdef ENABLE_BENCHMARK LARGE_INTEGER tStart, tEnd; QueryPerformanceFrequency(&tStart); double freq = (double)tStart.QuadPart; QueryPerformanceCounter(&tStart); #endif mfxSyncPoint syncpD; mfxSyncPoint syncpV; mfxFrameSurface1* pmfxOutSurface = NULL; int nIndex = 0; int nIndex2 = 0; mfxU32 nFrame = 0; // // Stage 1: Main decoding loop // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts) { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // Wait if device is busy, then repeat the same call to DecodeFrameAsync if (MFX_ERR_MORE_DATA == sts) { sts = ReadBitStreamData(&mfxBS, fSource); // Read more data into input bit stream MSDK_BREAK_ON_ERROR(sts); } if (MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE == sts) { nIndex = GetFreeSurfaceIndex(pmfxSurfaces, nSurfNumDecVPP); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; } // Decode a frame asychronously (returns immediately) sts = mfxDEC.DecodeFrameAsync(&mfxBS, pmfxSurfaces[nIndex], &pmfxOutSurface, &syncpD); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncpD) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) { nIndex2 = GetFreeSurfaceIndex(pmfxSurfaces2, nSurfNumVPPOut); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(pmfxOutSurface, pmfxSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } // VPP needs more data, let decoder decode another frame as input if (MFX_ERR_MORE_DATA == sts) { continue; } else if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); } if (MFX_ERR_NONE == sts) sts = mfxSession.SyncOperation(syncpV, 60000); // Synchronize. Wait until decoded frame is ready if (MFX_ERR_NONE == sts) { ++nFrame; #ifdef ENABLE_OUTPUT sts = WriteRawFrame(pmfxSurfaces2[nIndex2], fSink); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } } // MFX_ERR_MORE_DATA means that file has ended, need to go to buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 2: Retrieve the buffered decoded frames // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_SURFACE == sts) { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // Wait if device is busy, then repeat the same call to DecodeFrameAsync nIndex = GetFreeSurfaceIndex(pmfxSurfaces, nSurfNumDecVPP); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; // Decode a frame asychronously (returns immediately) sts = mfxDEC.DecodeFrameAsync(NULL, pmfxSurfaces[nIndex], &pmfxOutSurface, &syncpD); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncpD) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) { nIndex2 = GetFreeSurfaceIndex(pmfxSurfaces2, nSurfNumVPPOut); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(pmfxOutSurface, pmfxSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } // VPP needs more data, let decoder decode another frame as input if (MFX_ERR_MORE_DATA == sts) { continue; } else if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); } if (MFX_ERR_NONE == sts) sts = mfxSession.SyncOperation(syncpV, 60000); // Synchronize. Waits until decoded frame is ready if (MFX_ERR_NONE == sts) { ++nFrame; #ifdef ENABLE_OUTPUT sts = WriteRawFrame(pmfxSurfaces2[nIndex2], fSink); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } } // MFX_ERR_MORE_DATA means that decoder is done with buffered frames, need to go to VPP buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 3: Retrieve the buffered VPP frames // while (MFX_ERR_NONE <= sts) { nIndex2 = GetFreeSurfaceIndex(pmfxSurfaces2, nSurfNumVPPOut); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex2) return MFX_ERR_MEMORY_ALLOC; // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(NULL, pmfxSurfaces2[nIndex2], NULL, &syncpV); MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_SURFACE); MSDK_BREAK_ON_ERROR(sts); sts = mfxSession.SyncOperation(syncpV, 60000); // Synchronize. Wait until frame processing is ready MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); ++nFrame; #ifdef ENABLE_OUTPUT sts = WriteRawFrame(pmfxSurfaces2[nIndex2], fSink); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } // MFX_ERR_MORE_DATA indicates that all buffers has been fetched, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); #ifdef ENABLE_BENCHMARK QueryPerformanceCounter(&tEnd); double duration = ((double)tEnd.QuadPart - (double)tStart.QuadPart) / freq; printf("\nExecution time: %3.2fs (%3.2ffps)\n", duration, nFrame/duration); #endif // =================================================================== // Clean up resources // - It is recommended to close Media SDK components first, before releasing allocated surfaces, since // some surfaces may still be locked by internal Media SDK resources. mfxDEC.Close(); mfxVPP.Close(); // mfxSession closed automatically on destruction for (int i = 0; i < nSurfNumDecVPP; i++) delete pmfxSurfaces[i]; for (int i = 0; i < nSurfNumVPPOut; i++) delete pmfxSurfaces2[i]; MSDK_SAFE_DELETE_ARRAY(pmfxSurfaces); MSDK_SAFE_DELETE_ARRAY(pmfxSurfaces2); MSDK_SAFE_DELETE_ARRAY(surfaceBuffers); MSDK_SAFE_DELETE_ARRAY(surfaceBuffers2); MSDK_SAFE_DELETE_ARRAY(mfxBS.Data); fclose(fSource); fclose(fSink); return 0; }