bool MSDKEncode::SetEncodeParam(VADisplay* pVaDpy, unsigned short bitrate, int nLogicIndex, Measurement* pMeasuremnt, bool bUseMeasure) #endif { if (!bitrate) { H264E_TRACE_ERROR("Invalid input encode parameters, set parameters failed\n"); return false; } if (bUseMeasure) m_pMeasuremnt = pMeasuremnt; m_nLogicIndex = nLogicIndex; #ifdef CONFIG_USE_MFXALLOCATOR if (!CreateSessionAllocator(pVaDpy)) return false; #else MFXVideoSession* pSession = CreateSession(*pVaDpy); if (!pSession) { H264E_TRACE_ERROR("[MSDKVpp]-----Create session failed\n"); return false; } m_pSession = pSession; #endif m_pEncode = new MFXVideoENCODE(*m_pSession); m_mfxVideoParam.mfx.CodecId = MFX_CODEC_AVC; m_mfxVideoParam.mfx.CodecProfile = MFX_PROFILE_AVC_BASELINE; m_mfxVideoParam.mfx.CodecLevel = MFX_LEVEL_UNKNOWN; //SDK functions will determine the correct level m_mfxVideoParam.mfx.TargetUsage = MFX_TARGETUSAGE_BALANCED; m_mfxVideoParam.mfx.GopPicSize = GROUP_OF_PICTURE; m_mfxVideoParam.mfx.GopRefDist = 1; //distance between I- or P- key frames(1 means no B-frames) m_mfxVideoParam.mfx.RateControlMethod = MFX_RATECONTROL_VBR; m_mfxVideoParam.mfx.TargetKbps = bitrate; m_mfxVideoParam.mfx.NumSlice = 0; m_mfxVideoParam.mfx.NumRefFrame = 1; m_mfxVideoParam.mfx.EncodedOrder = 0; //specify the EncodedOrder as display order #ifdef CONFIG_READ_RAW_BUFFER m_mfxVideoParam.mfx.FrameInfo.FrameRateExtN = 30; m_mfxVideoParam.mfx.FrameInfo.FrameRateExtD = 1; m_mfxVideoParam.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12; m_mfxVideoParam.mfx.FrameInfo.ChromaFormat = MFX_CHROMAFORMAT_YUV420; m_mfxVideoParam.mfx.FrameInfo.CropX = 0; m_mfxVideoParam.mfx.FrameInfo.CropY = 0; m_mfxVideoParam.mfx.FrameInfo.CropW = width; m_mfxVideoParam.mfx.FrameInfo.CropH = height; m_mfxVideoParam.mfx.FrameInfo.Width = MSDK_ALIGN16(width); m_mfxVideoParam.mfx.FrameInfo.Height = MSDK_ALIGN16(height); m_mfxVideoParam.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; #endif m_mfxVideoParam.AsyncDepth = 1; return true; }
mfxStatus IntelDecoder::QueryAndAllocRequiredSurfacesForSW() { mfxStatus sts = MFX_ERR_NONE; // Query number of required surfaces for decoder mfxFrameAllocRequest DecRequest; memset(&DecRequest, 0, sizeof(DecRequest)); sts = mfxDEC->QueryIOSurf(&mfxVideoParams, &DecRequest); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); numSurfaces = DecRequest.NumFrameSuggested; //VPPRequest[0].Type |= WILL_WRITE; // This line is only required for Windows DirectX11 to ensure that surfaces can be written to by the application //DecRequest.Type |= WILL_READ; // This line is only required for Windows DirectX11 to ensure that surfaces can be retrieved by the application // Allocate surfaces for decoder // - Width and height of buffer must be aligned, a multiple of 32 // - Frame surface array keeps pointers all surface planes and general frame info mfxU16 width = (mfxU16)MSDK_ALIGN(DecRequest.Info.Width); mfxU16 height = (mfxU16)MSDK_ALIGN16(DecRequest.Info.Height); mfxU8 bitsPerPixel = 12; // NV12 format is a 12 bits per pixel format mfxU32 surfaceSize = width * height * bitsPerPixel / 8; mfxU8* surfaceBuffers = (mfxU8*) new mfxU8[surfaceSize * numSurfaces]; // Allocate surface headers (mfxFrameSurface1) for decoder pmfxSurfaces = new mfxFrameSurface1 *[numSurfaces]; MSDK_CHECK_POINTER(pmfxSurfaces, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < numSurfaces; i++) { pmfxSurfaces[i] = new mfxFrameSurface1; memset(pmfxSurfaces[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pmfxSurfaces[i]->Info), &(mfxVideoParams.mfx.FrameInfo), sizeof(mfxFrameInfo)); pmfxSurfaces[i]->Data.Y = &surfaceBuffers[surfaceSize * i]; pmfxSurfaces[i]->Data.U = pmfxSurfaces[i]->Data.Y + width * height; pmfxSurfaces[i]->Data.V = pmfxSurfaces[i]->Data.U + 1; pmfxSurfaces[i]->Data.Pitch = width; } return sts; }
bool QSV_Encoder_Internal::InitParams(qsv_param_t * pParams) { memset(&m_mfxEncParams, 0, sizeof(m_mfxEncParams)); m_mfxEncParams.mfx.CodecId = MFX_CODEC_AVC; m_mfxEncParams.mfx.GopOptFlag = MFX_GOP_CLOSED; m_mfxEncParams.mfx.NumSlice = 1; m_mfxEncParams.mfx.TargetUsage = pParams->nTargetUsage; m_mfxEncParams.mfx.CodecProfile = pParams->nCodecProfile; m_mfxEncParams.mfx.FrameInfo.FrameRateExtN = pParams->nFpsNum; m_mfxEncParams.mfx.FrameInfo.FrameRateExtD = pParams->nFpsDen; m_mfxEncParams.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12; m_mfxEncParams.mfx.FrameInfo.ChromaFormat = MFX_CHROMAFORMAT_YUV420; m_mfxEncParams.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; m_mfxEncParams.mfx.FrameInfo.CropX = 0; m_mfxEncParams.mfx.FrameInfo.CropY = 0; m_mfxEncParams.mfx.FrameInfo.CropW = pParams->nWidth; m_mfxEncParams.mfx.FrameInfo.CropH = pParams->nHeight; m_mfxEncParams.mfx.RateControlMethod = pParams->nRateControl; switch (pParams->nRateControl) { case MFX_RATECONTROL_CBR: m_mfxEncParams.mfx.TargetKbps = pParams->nTargetBitRate; break; case MFX_RATECONTROL_VBR: case MFX_RATECONTROL_VCM: m_mfxEncParams.mfx.TargetKbps = pParams->nTargetBitRate; m_mfxEncParams.mfx.MaxKbps = pParams->nMaxBitRate; break; case MFX_RATECONTROL_CQP: m_mfxEncParams.mfx.QPI = pParams->nQPI; m_mfxEncParams.mfx.QPB = pParams->nQPB; m_mfxEncParams.mfx.QPP = pParams->nQPP; break; case MFX_RATECONTROL_AVBR: m_mfxEncParams.mfx.TargetKbps = pParams->nTargetBitRate; m_mfxEncParams.mfx.Accuracy = pParams->nAccuracy; m_mfxEncParams.mfx.Convergence = pParams->nConvergence; break; case MFX_RATECONTROL_ICQ: m_mfxEncParams.mfx.ICQQuality = pParams->nICQQuality; break; case MFX_RATECONTROL_LA: m_mfxEncParams.mfx.TargetKbps = pParams->nTargetBitRate; break; case MFX_RATECONTROL_LA_ICQ: m_mfxEncParams.mfx.ICQQuality = pParams->nICQQuality; break; default: break; } m_mfxEncParams.mfx.GopPicSize = (mfxU16) (pParams->nKeyIntSec * pParams->nFpsNum / (float) pParams->nFpsDen); m_mfxEncParams.mfx.GopRefDist = pParams->nbFrames + 1; if (pParams->nRateControl == MFX_RATECONTROL_LA_ICQ || pParams->nRateControl == MFX_RATECONTROL_LA) { memset(&m_co2, 0, sizeof(mfxExtCodingOption2)); m_co2.Header.BufferId = MFX_EXTBUFF_CODING_OPTION; m_co2.Header.BufferSz = sizeof(m_co2); m_co2.LookAheadDepth = pParams->nLADEPTH; static mfxExtBuffer* extendedBuffers[1]; extendedBuffers[0] = (mfxExtBuffer*)& m_co2; m_mfxEncParams.ExtParam = extendedBuffers; m_mfxEncParams.NumExtParam = 1; } // Width must be a multiple of 16 // Height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture m_mfxEncParams.mfx.FrameInfo.Width = MSDK_ALIGN16(pParams->nWidth); m_mfxEncParams.mfx.FrameInfo.Height = MSDK_ALIGN16(pParams->nHeight); // m_mfxEncParams.AsyncDepth = 4; m_mfxEncParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY; return true; }
mfxStatus MSDKVpp::InitVpp(mfxFrameSurface1* pFrameSurface) { if (m_pMeasuremnt) { m_pMeasuremnt->GetLock(); m_pMeasuremnt->TimeStpStart(VPP_INIT_TIME_STAMP, this); m_pMeasuremnt->RelLock(); } //Input data m_mfxVideoParam.vpp.In.FrameRateExtN = pFrameSurface->Info.FrameRateExtN; m_mfxVideoParam.vpp.In.FrameRateExtD = pFrameSurface->Info.FrameRateExtD; m_mfxVideoParam.vpp.In.FourCC = pFrameSurface->Info.FourCC; m_mfxVideoParam.vpp.In.ChromaFormat = pFrameSurface->Info.ChromaFormat; m_mfxVideoParam.vpp.In.PicStruct = pFrameSurface->Info.PicStruct; m_mfxVideoParam.vpp.In.CropX = pFrameSurface->Info.CropX; m_mfxVideoParam.vpp.In.CropY = pFrameSurface->Info.CropY; m_mfxVideoParam.vpp.In.CropW = pFrameSurface->Info.CropW; m_mfxVideoParam.vpp.In.CropH = pFrameSurface->Info.CropH; VPP_TRACE_INFO("[MSDKVpp]-----Init VPP, in dst %d/%d\n", m_mfxVideoParam.vpp.In.CropW, m_mfxVideoParam.vpp.In.CropH); m_mfxVideoParam.vpp.In.Width = MSDK_ALIGN16(m_mfxVideoParam.vpp.In.CropW); m_mfxVideoParam.vpp.In.Height = MSDK_ALIGN16(m_mfxVideoParam.vpp.In.CropH); //Output data MSDKDecode* pDecode = NULL; for (std::map<MSDKDecodeVpp*, MediaBuf>::iterator it = m_mapMediaBuf.begin(); it != m_mapMediaBuf.end(); it++) { pDecode = dynamic_cast<MSDKDecode*>(it->first); //select the maximum frame rate among the multiplex stream to set the composite frame rate if (pDecode->GetFrameRateExtN()/pDecode->GetFrameRateExtD() > m_frameRate) { m_mfxVideoParam.vpp.Out.FrameRateExtN = pDecode->GetFrameRateExtN(); m_mfxVideoParam.vpp.Out.FrameRateExtD = pDecode->GetFrameRateExtD(); m_frameRate = m_mfxVideoParam.vpp.Out.FrameRateExtN/m_mfxVideoParam.vpp.Out.FrameRateExtD; } } for (std::map<MSDKDecodeVpp*, MediaBuf>::iterator it = m_mapMediaBuf.begin(); it != m_mapMediaBuf.end(); it++) dynamic_cast<MSDKDecode*>(it->first)->SetCompFrameRate(m_frameRate); m_nInterFrameSpace = 1000*1000/m_frameRate; m_nSleepInterval = m_nInterFrameSpace; //sleep full of the inter-frame space //calculate the arg `growth potential` in logistic equation m_argMasterGrowthPotential = log(2*m_nInterFrameSpace-1)/(LOGISTIC_INTERVAL_UPPER/2); VPP_TRACE_INFO("[MSDKVpp]-----Vpp output frame rate: %d\n", m_frameRate); m_mfxVideoParam.vpp.Out.FourCC = m_mfxVideoParam.vpp.In.FourCC; m_mfxVideoParam.vpp.Out.ChromaFormat = m_mfxVideoParam.vpp.In.ChromaFormat; m_mfxVideoParam.vpp.Out.PicStruct = m_mfxVideoParam.vpp.In.PicStruct; m_mfxVideoParam.vpp.Out.CropX = 0; m_mfxVideoParam.vpp.Out.CropY = 0; m_mfxVideoParam.vpp.Out.CropW = m_vppCfg.out_width; m_mfxVideoParam.vpp.Out.CropH = m_vppCfg.out_height; m_mfxVideoParam.vpp.Out.Width = MSDK_ALIGN16(m_mfxVideoParam.vpp.Out.CropW); m_mfxVideoParam.vpp.Out.Height = MSDK_ALIGN16(m_mfxVideoParam.vpp.Out.CropH); #ifdef CONFIG_USE_MFXALLOCATOR m_mfxVideoParam.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY; VPP_TRACE_INFO("----------------vpp using video memory\n"); #else m_mfxVideoParam.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY | MFX_IOPATTERN_OUT_SYSTEM_MEMORY; VPP_TRACE_INFO("----------------vpp using system memory\n"); #endif mfxStatus sts = MFX_ERR_NONE; mfxFrameAllocRequest vppRequest[2]; //[0]-in, [1]-out memset(&vppRequest, 0, sizeof(mfxFrameAllocRequest)*2); sts = m_pVpp->QueryIOSurf(&m_mfxVideoParam, vppRequest); if (sts < MFX_ERR_NONE) return sts; m_nSurfaces = vppRequest[1].NumFrameSuggested+15; VPP_TRACE_INFO("[MSDKVpp]-----VPP suggest number of surfaces is in/out %d/%d\n", vppRequest[0].NumFrameSuggested, m_nSurfaces); VPP_TRACE_INFO("[MSDKVpp]-----Creating VPP surface pool, surface num %d\n", m_nSurfaces); sts = AllocFrames(&vppRequest[1]); mfxExtVPPComposite vppComp; if (VPP_COMP == m_mode) { SetVppCompParam(&vppComp); m_mfxVideoParam.ExtParam[0] = (mfxExtBuffer*)&vppComp; m_mfxVideoParam.NumExtParam = 1; } sts = m_pVpp->Init(&m_mfxVideoParam); if (MFX_WRN_FILTER_SKIPPED == sts) { VPP_TRACE_INFO("[MSDKVpp]-----Got MFX_WRN_FILTER_SKIPPED\n"); sts = MFX_ERR_NONE; } if (VPP_COMP == m_mode) delete[] vppComp.InputStream; if (m_pMeasuremnt) { m_pMeasuremnt->GetLock(); m_pMeasuremnt->TimeStpFinish(VPP_INIT_TIME_STAMP, this); m_pMeasuremnt->RelLock(); } return sts; }
mfxStatus D3D11FrameAllocator::LockFrame(mfxMemId mid, mfxFrameData *ptr) { HRESULT hRes = S_OK; D3D11_TEXTURE2D_DESC desc = {0}; D3D11_MAPPED_SUBRESOURCE lockedRect = {0}; //check that texture exists TextureSubResource sr = GetResourceFromMid(mid); if (!sr.GetTexture()) return MFX_ERR_LOCK_MEMORY; D3D11_MAP mapType = D3D11_MAP_READ; UINT mapFlags = D3D11_MAP_FLAG_DO_NOT_WAIT; { ASSERT(NULL != sr.GetStaging()); sr.GetTexture()->GetDesc(&desc); if (DXGI_FORMAT_NV12 != desc.Format) { return MFX_ERR_LOCK_MEMORY; } #ifdef D3D11_PARALLEL_COPY // copy original frame to staging frame - CPU can't access original frame // parallel copy is a little faster D3D11_BOX box; MSDK_ZERO_VAR(box); D3D11_TEXTURE2D_DESC desc; sr.GetTexture()->GetDesc(&desc); box.right = desc.Width; box.bottom = desc.Height; int count = 2; ID3D11DeviceContext* pDeviceContext = m_pDeviceContext; Concurrency::parallel_for(0, count+1, [pDeviceContext, &sr, &box, count](int i) { int block = MSDK_ALIGN16(box.bottom / count); D3D11_BOX tmp_box = box; tmp_box.top = i * block; tmp_box.bottom = (i == count) ? box.bottom : tmp_box.top + block; pDeviceContext->CopySubresourceRegion(sr.GetStaging(), 0, tmp_box.left, tmp_box.top, 0, sr.GetTexture(), sr.GetSubResource(), &tmp_box); }); #else // Single threaded copy m_pDeviceContext->CopySubresourceRegion(sr.GetStaging(), 0, 0, 0, 0, sr.GetTexture(), sr.GetSubResource(), NULL); #endif do { hRes = m_pDeviceContext->Map(sr.GetStaging(), 0, mapType, mapFlags, &lockedRect); if (S_OK != hRes && DXGI_ERROR_WAS_STILL_DRAWING != hRes) { MSDK_TRACE("ERROR: m_pDeviceContext->Map = 0x%lX\n", hRes); } } while (DXGI_ERROR_WAS_STILL_DRAWING == hRes); } if (FAILED(hRes)) return MFX_ERR_LOCK_MEMORY; MSDK_CHECK_NOT_EQUAL(desc.Format, DXGI_FORMAT_NV12, MFX_ERR_LOCK_MEMORY); ptr->Pitch = (mfxU16)lockedRect.RowPitch; ptr->Y = (mfxU8 *)lockedRect.pData; ptr->U = (mfxU8 *)lockedRect.pData + desc.Height * lockedRect.RowPitch; ptr->V = ptr->U + 1; return MFX_ERR_NONE; }
int main() { mfxStatus sts = MFX_ERR_NONE; mfxU16 inputWidth = 1920; mfxU16 inputHeight = 1080; // ===================================================================== // Intel Media SDK encode pipeline setup // - In this example we are encoding an AVC (H.264) stream // - Video memory surfaces are used // - Asynchronous operation by executing more than one encode operation simultaneously // // Open input YV12 YUV file FILE* fSource; fopen_s(&fSource, "bbb1920x1080.yuv", "rb"); MSDK_CHECK_POINTER(fSource, MFX_ERR_NULL_PTR); // Create output elementary stream (ES) H.264 file FILE* fSink; fopen_s(&fSink, "test_d3d_async.264", "wb"); MSDK_CHECK_POINTER(fSink, MFX_ERR_NULL_PTR); // Initialize Media SDK session // - MFX_IMPL_AUTO_ANY selects HW accelaration if available (on any adapter) // - Version 1.0 is selected for greatest backwards compatibility. // If more recent API features are needed, change the version accordingly mfxIMPL impl = MFX_IMPL_AUTO_ANY; #ifdef DX11_D3D impl |= MFX_IMPL_VIA_D3D11; #endif mfxVersion ver = {0, 1}; MFXVideoSession mfxSession; sts = mfxSession.Init(impl, &ver); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create DirectX device context mfxHDL deviceHandle; sts = CreateHWDevice(mfxSession, &deviceHandle, NULL); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Provide device manager to Media SDK sts = mfxSession.SetHandle(DEVICE_MGR_TYPE, deviceHandle); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxFrameAllocator mfxAllocator; mfxAllocator.Alloc = simple_alloc; mfxAllocator.Free = simple_free; mfxAllocator.Lock = simple_lock; mfxAllocator.Unlock = simple_unlock; mfxAllocator.GetHDL = simple_gethdl; // When using video memory we must provide Media SDK with an external allocator sts = mfxSession.SetFrameAllocator(&mfxAllocator); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize encoder parameters mfxVideoParam mfxEncParams; memset(&mfxEncParams, 0, sizeof(mfxEncParams)); mfxEncParams.mfx.CodecId = MFX_CODEC_AVC; mfxEncParams.mfx.TargetUsage = MFX_TARGETUSAGE_BALANCED; mfxEncParams.mfx.TargetKbps = 2000; mfxEncParams.mfx.RateControlMethod = MFX_RATECONTROL_VBR; mfxEncParams.mfx.FrameInfo.FrameRateExtN = 30; mfxEncParams.mfx.FrameInfo.FrameRateExtD = 1; mfxEncParams.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12; mfxEncParams.mfx.FrameInfo.ChromaFormat = MFX_CHROMAFORMAT_YUV420; mfxEncParams.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; mfxEncParams.mfx.FrameInfo.CropX = 0; mfxEncParams.mfx.FrameInfo.CropY = 0; mfxEncParams.mfx.FrameInfo.CropW = inputWidth; mfxEncParams.mfx.FrameInfo.CropH = inputHeight; // Width must be a multiple of 16 // Height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture mfxEncParams.mfx.FrameInfo.Width = MSDK_ALIGN16(inputWidth); mfxEncParams.mfx.FrameInfo.Height = (MFX_PICSTRUCT_PROGRESSIVE == mfxEncParams.mfx.FrameInfo.PicStruct)? MSDK_ALIGN16(inputHeight) : MSDK_ALIGN32(inputHeight); mfxEncParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY; // Configure Media SDK to keep more operations in flight // - AsyncDepth represents the number of tasks that can be submitted, before synchronizing is required // - The choice of AsyncDepth = 4 is quite arbitrary but has proven to result in good performance mfxEncParams.AsyncDepth = 4; // Create Media SDK encoder MFXVideoENCODE mfxENC(mfxSession); // Validate video encode parameters (optional) // - In this example the validation result is written to same structure // - MFX_WRN_INCOMPATIBLE_VIDEO_PARAM is returned if some of the video parameters are not supported, // instead the encoder will select suitable parameters closest matching the requested configuration sts = mfxENC.Query(&mfxEncParams, &mfxEncParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_INCOMPATIBLE_VIDEO_PARAM); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number of required surfaces for encoder mfxFrameAllocRequest EncRequest; memset(&EncRequest, 0, sizeof(EncRequest)); sts = mfxENC.QueryIOSurf(&mfxEncParams, &EncRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); #ifdef DX11_D3D EncRequest.Type |= WILL_WRITE; // Hint to DX11 memory handler that application will write data to input surfaces #endif // Allocate required surfaces mfxFrameAllocResponse mfxResponse; sts = mfxAllocator.Alloc(mfxAllocator.pthis, &EncRequest, &mfxResponse); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxU16 nEncSurfNum = mfxResponse.NumFrameActual; // Allocate surface headers (mfxFrameSurface1) for decoder mfxFrameSurface1** pmfxSurfaces = new mfxFrameSurface1*[nEncSurfNum]; MSDK_CHECK_POINTER(pmfxSurfaces, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nEncSurfNum; i++) { pmfxSurfaces[i] = new mfxFrameSurface1; memset(pmfxSurfaces[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pmfxSurfaces[i]->Info), &(mfxEncParams.mfx.FrameInfo), sizeof(mfxFrameInfo)); pmfxSurfaces[i]->Data.MemId = mfxResponse.mids[i]; // MID (memory id) represent one D3D NV12 surface #ifndef ENABLE_INPUT // In case simulating direct access to frames we initialize the allocated surfaces with default pattern // - For true benchmark comparisons to async workloads all surfaces must have the same data #ifndef DX11_D3D IDirect3DSurface9 *pSurface; D3DSURFACE_DESC desc; D3DLOCKED_RECT locked; pSurface = (IDirect3DSurface9 *)mfxResponse.mids[i]; pSurface->GetDesc(&desc); pSurface->LockRect(&locked, 0, D3DLOCK_NOSYSLOCK); memset((mfxU8 *)locked.pBits, 100, desc.Height*locked.Pitch); // Y plane memset((mfxU8 *)locked.pBits + desc.Height * locked.Pitch, 50, (desc.Height*locked.Pitch)/2); // UV plane pSurface->UnlockRect(); #else // For now, just leave D3D11 surface data uninitialized #endif #endif } // Initialize the Media SDK encoder sts = mfxENC.Init(&mfxEncParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Retrieve video parameters selected by encoder. // - BufferSizeInKB parameter is required to set bit stream buffer size mfxVideoParam par; memset(&par, 0, sizeof(par)); sts = mfxENC.GetVideoParam(&par); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create task pool to improve asynchronous performance (greater GPU utilization) mfxU16 taskPoolSize = mfxEncParams.AsyncDepth; // number of tasks that can be submitted, before synchronizing is required Task* pTasks = new Task[taskPoolSize]; memset(pTasks, 0, sizeof(Task) * taskPoolSize); for(int i=0;i<taskPoolSize;i++) { // Prepare Media SDK bit stream buffer pTasks[i].mfxBS.MaxLength = par.mfx.BufferSizeInKB * 1000; pTasks[i].mfxBS.Data = new mfxU8[pTasks[i].mfxBS.MaxLength]; MSDK_CHECK_POINTER(pTasks[i].mfxBS.Data, MFX_ERR_MEMORY_ALLOC); } // =================================== // Start encoding the frames // #ifdef ENABLE_BENCHMARK LARGE_INTEGER tStart, tEnd; QueryPerformanceFrequency(&tStart); double freq = (double)tStart.QuadPart; QueryPerformanceCounter(&tStart); #endif int nEncSurfIdx = 0; int nTaskIdx = 0; int nFirstSyncTask = 0; mfxU32 nFrame = 0; // // Stage 1: Main encoding loop // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = mfxSession.SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT printf("Frame number: %d\r", nFrame); #endif } else { nEncSurfIdx = GetFreeSurfaceIndex(pmfxSurfaces, nEncSurfNum); // Find free frame surface if (MFX_ERR_NOT_FOUND == nEncSurfIdx) return MFX_ERR_MEMORY_ALLOC; // Surface locking required when read/write D3D surfaces sts = mfxAllocator.Lock(mfxAllocator.pthis, pmfxSurfaces[nEncSurfIdx]->Data.MemId, &(pmfxSurfaces[nEncSurfIdx]->Data)); MSDK_BREAK_ON_ERROR(sts); sts = LoadRawFrame(pmfxSurfaces[nEncSurfIdx], fSource); MSDK_BREAK_ON_ERROR(sts); sts = mfxAllocator.Unlock(mfxAllocator.pthis, pmfxSurfaces[nEncSurfIdx]->Data.MemId, &(pmfxSurfaces[nEncSurfIdx]->Data)); MSDK_BREAK_ON_ERROR(sts); for (;;) { // Encode a frame asychronously (returns immediately) sts = mfxENC.EncodeFrameAsync(NULL, pmfxSurfaces[nEncSurfIdx], &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // Repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // Wait if device is busy, then repeat the same call } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // Ignore warnings if output is available break; } else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) { // Allocate more bitstream buffer memory here if needed... break; } else break; } } } // MFX_ERR_MORE_DATA means that the input file has ended, need to go to buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 2: Retrieve the buffered encoded frames // while (MFX_ERR_NONE <= sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = mfxSession.SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT printf("Frame number: %d\r", nFrame); #endif } else { for (;;) { // Encode a frame asychronously (returns immediately) sts = mfxENC.EncodeFrameAsync(NULL, NULL, &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // Repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // Wait if device is busy, then repeat the same call } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // Ignore warnings if output is available break; } else break; } } } // MFX_ERR_MORE_DATA indicates that there are no more buffered frames, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 3: Sync all remaining tasks in task pool // while(pTasks[nFirstSyncTask].syncp) { sts = mfxSession.SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT printf("Frame number: %d\r", nFrame); #endif } #ifdef ENABLE_BENCHMARK QueryPerformanceCounter(&tEnd); double duration = ((double)tEnd.QuadPart - (double)tStart.QuadPart) / freq; printf("\nExecution time: %3.2fs (%3.2ffps)\n", duration, nFrame/duration); #endif // =================================================================== // Clean up resources // - It is recommended to close Media SDK components first, before releasing allocated surfaces, since // some surfaces may still be locked by internal Media SDK resources. mfxENC.Close(); // mfxSession closed automatically on destruction for (int i = 0; i < nEncSurfNum; i++) delete pmfxSurfaces[i]; MSDK_SAFE_DELETE_ARRAY(pmfxSurfaces); for(int i=0;i<taskPoolSize;i++) MSDK_SAFE_DELETE_ARRAY(pTasks[i].mfxBS.Data); MSDK_SAFE_DELETE_ARRAY(pTasks); fclose(fSource); fclose(fSink); CleanupHWDevice(); return 0; }
extern "C" __declspec(dllexport) void *openEncoder(int *pErrorCode, int width, int height, int bitRate, int gop) { *pErrorCode = 0; IntelEncoderHandle *pHandle = (IntelEncoderHandle *) malloc(sizeof(IntelEncoderHandle)); mfxStatus sts = MFX_ERR_NONE; mfxIMPL impl = MFX_IMPL_AUTO_ANY; mfxVersion ver; ver.Major = 1; ver.Minor = 0; sts = MFXInit(impl, &ver, &pHandle->session); if (MFX_ERR_NONE != sts) { // TODO: *pErrorCode = -1; } MFXQueryIMPL(pHandle->session, &impl); mfxVersion verTemp; MFXQueryVersion(pHandle->session, &verTemp); mfxVideoParam mfxEncParams; memset(&mfxEncParams, 0, sizeof(mfxEncParams)); mfxEncParams.mfx.CodecId = MFX_CODEC_AVC; // mfxEncParams.mfx.CodecProfile = MFX_PROFILE_AVC_CONSTRAINED_BASELINE; mfxEncParams.mfx.TargetUsage = MFX_TARGETUSAGE_BALANCED; if (0 == bitRate) bitRate = 128 * 8; mfxEncParams.mfx.TargetKbps = bitRate; mfxEncParams.mfx.RateControlMethod = MFX_RATECONTROL_VBR; mfxEncParams.mfx.FrameInfo.FrameRateExtN = 30; mfxEncParams.mfx.FrameInfo.FrameRateExtD = 1; mfxEncParams.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12; mfxEncParams.mfx.FrameInfo.ChromaFormat = MFX_CHROMAFORMAT_YUV420; mfxEncParams.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; mfxEncParams.mfx.FrameInfo.CropX = 0; mfxEncParams.mfx.FrameInfo.CropY = 0; mfxEncParams.mfx.FrameInfo.CropW = width; mfxEncParams.mfx.FrameInfo.CropH = height; // Width must be a multiple of 16 // Height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture mfxEncParams.mfx.FrameInfo.Width = MSDK_ALIGN16(width); mfxEncParams.mfx.FrameInfo.Height = (MFX_PICSTRUCT_PROGRESSIVE == mfxEncParams.mfx.FrameInfo.PicStruct) ? MSDK_ALIGN16(width) : MSDK_ALIGN32(height); mfxEncParams.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY; sts = MFXVideoENCODE_Query(pHandle->session, &mfxEncParams, &mfxEncParams); if (MFX_ERR_NONE != sts) { // TODO: *pErrorCode = -2; } mfxFrameAllocRequest EncRequest; memset(&EncRequest, 0, sizeof(EncRequest)); sts = MFXVideoENCODE_QueryIOSurf(pHandle->session, &mfxEncParams, &EncRequest); if (MFX_ERR_NONE != sts) { // TODO: *pErrorCode = -3; } pHandle->nEncSurfNum = EncRequest.NumFrameSuggested; mfxU16 w = (mfxU16)MSDK_ALIGN32(EncRequest.Info.Width); mfxU16 h = (mfxU16)MSDK_ALIGN32(EncRequest.Info.Height); mfxU8 bitsPerPixel = 12; // NV12 format is a 12 bits per pixel format mfxU32 surfaceSize = w * h * bitsPerPixel / 8; pHandle->pSurfaceBuffers = (mfxU8 *)malloc(surfaceSize * pHandle->nEncSurfNum * sizeof(mfxU8)); pHandle->ppEncSurfaces = (mfxFrameSurface1 **)malloc(sizeof(mfxFrameSurface1*)* pHandle->nEncSurfNum); for (int i = 0; i < pHandle->nEncSurfNum; i++) { pHandle->ppEncSurfaces[i] = (mfxFrameSurface1 *)malloc(sizeof(mfxFrameSurface1)); memset(pHandle->ppEncSurfaces[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pHandle->ppEncSurfaces[i]->Info), &(mfxEncParams.mfx.FrameInfo), sizeof(mfxFrameInfo)); pHandle->ppEncSurfaces[i]->Data.Y = &pHandle->pSurfaceBuffers[surfaceSize * i]; pHandle->ppEncSurfaces[i]->Data.U = pHandle->ppEncSurfaces[i]->Data.Y + w * h; pHandle->ppEncSurfaces[i]->Data.V = pHandle->ppEncSurfaces[i]->Data.U + 1; pHandle->ppEncSurfaces[i]->Data.Pitch = w; // In case simulating direct access to frames we initialize the allocated surfaces with default pattern // - For true benchmark comparisons to async workloads all surfaces must have the same data memset(pHandle->ppEncSurfaces[i]->Data.Y, 100, w * h); // Y plane memset(pHandle->ppEncSurfaces[i]->Data.U, 50, (w * h) / 2); // UV plane } sts = MFXVideoENCODE_Init(pHandle->session, &mfxEncParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); if (MFX_ERR_NONE != sts) { // TODO: *pErrorCode = -4; } mfxVideoParam par; memset(&par, 0, sizeof(par)); sts = MFXVideoENCODE_GetVideoParam(pHandle->session, &par); if (MFX_ERR_NONE != sts) { // TODO: *pErrorCode = -5; } memset(&pHandle->mfxBS, 0, sizeof(pHandle->mfxBS)); pHandle->mfxBS.MaxLength = par.mfx.BufferSizeInKB * 1024; pHandle->mfxBS.Data = (mfxU8 *)malloc(sizeof(mfxU8)* pHandle->mfxBS.MaxLength); return pHandle; }
DWORD WINAPI TranscodeThread(LPVOID arg) { ThreadData *pData = (ThreadData *)arg; int id = pData->id; mfxStatus sts = MFX_ERR_NONE; // ===================================================================== // Intel Media SDK transcode opaque pipeline setup // - Transcode H.264 to H.264, resizing the encoded stream to half the resolution using VPP // - Multiple streams are transcoded concurrently // - Same input stream is used for all concurrent threadcoding threads // // Open input H.264 elementary stream (ES) file FILE* fSource; char inFile[100] = "bbb640x480.264"; fopen_s(&fSource, inFile, "rb"); MSDK_CHECK_POINTER(fSource, MFX_ERR_NULL_PTR); // Create output elementary stream (ES) H.264 file FILE* fSink; char outFile[100] = "bbb320x240_xx.264"; outFile[11] = '0' + (char)(id/10); outFile[12] = '0' + (char)(id%10); fopen_s(&fSink, outFile, "wb"); MSDK_CHECK_POINTER(fSink, MFX_ERR_NULL_PTR); MFXVideoSession* pmfxSession = NULL; // Initialize Media SDK session // - MFX_IMPL_AUTO_ANY selects HW accelaration if available (on any adapter) // - Version 1.3 is selected since the opaque memory feature was added in this API release // If more recent API features are needed, change the version accordingly mfxIMPL impl = MFX_IMPL_AUTO_ANY; mfxVersion ver = {3, 1}; // Note: API 1.3 ! pmfxSession = new MFXVideoSession; MSDK_CHECK_POINTER(pmfxSession, MFX_ERR_NULL_PTR); sts = pmfxSession->Init(impl, &ver); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create Media SDK decoder & encoder & VPP MFXVideoDECODE* pmfxDEC = new MFXVideoDECODE(*pmfxSession); MSDK_CHECK_POINTER(pmfxDEC, MFX_ERR_NULL_PTR); MFXVideoENCODE* pmfxENC = new MFXVideoENCODE(*pmfxSession); MSDK_CHECK_POINTER(pmfxENC, MFX_ERR_NULL_PTR); MFXVideoVPP* pmfxVPP = new MFXVideoVPP(*pmfxSession); MSDK_CHECK_POINTER(pmfxVPP, MFX_ERR_NULL_PTR); // Set required video parameters for decode mfxVideoParam mfxDecParams; memset(&mfxDecParams, 0, sizeof(mfxDecParams)); mfxDecParams.mfx.CodecId = MFX_CODEC_AVC; mfxDecParams.IOPattern = MFX_IOPATTERN_OUT_OPAQUE_MEMORY; // Configure Media SDK to keep more operations in flight // - AsyncDepth represents the number of tasks that can be submitted, before synchronizing is required // - The choice of AsyncDepth = 3 is quite arbitrary but has proven to result in good performance mfxDecParams.AsyncDepth = 3; // Prepare Media SDK bit stream buffer for decoder // - Arbitrary buffer size for this example mfxBitstream mfxBS; memset(&mfxBS, 0, sizeof(mfxBS)); mfxBS.MaxLength = 1024 * 1024; mfxBS.Data = new mfxU8[mfxBS.MaxLength]; MSDK_CHECK_POINTER(mfxBS.Data, MFX_ERR_MEMORY_ALLOC); // Read a chunk of data from stream file into bit stream buffer // - Parse bit stream, searching for header and fill video parameters structure // - Abort if bit stream header is not found in the first bit stream buffer chunk sts = ReadBitStreamData(&mfxBS, fSource); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = pmfxDEC->DecodeHeader(&mfxBS, &mfxDecParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize VPP parameters mfxVideoParam VPPParams; memset(&VPPParams, 0, sizeof(VPPParams)); // Input data VPPParams.vpp.In.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.In.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.In.CropX = 0; VPPParams.vpp.In.CropY = 0; VPPParams.vpp.In.CropW = mfxDecParams.mfx.FrameInfo.CropW; VPPParams.vpp.In.CropH = mfxDecParams.mfx.FrameInfo.CropH; VPPParams.vpp.In.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.In.FrameRateExtN = 30; VPPParams.vpp.In.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.In.Width = MSDK_ALIGN16(VPPParams.vpp.In.CropW); VPPParams.vpp.In.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.In.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.In.CropH) : MSDK_ALIGN32(VPPParams.vpp.In.CropH); // Output data VPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.Out.CropX = 0; VPPParams.vpp.Out.CropY = 0; VPPParams.vpp.Out.CropW = VPPParams.vpp.In.CropW/2; // Half the resolution of decode stream VPPParams.vpp.Out.CropH = VPPParams.vpp.In.CropH/2; VPPParams.vpp.Out.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.Out.FrameRateExtN = 30; VPPParams.vpp.Out.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.Out.Width = MSDK_ALIGN16(VPPParams.vpp.Out.CropW); VPPParams.vpp.Out.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.Out.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.Out.CropH) : MSDK_ALIGN32(VPPParams.vpp.Out.CropH); VPPParams.IOPattern = MFX_IOPATTERN_IN_OPAQUE_MEMORY | MFX_IOPATTERN_OUT_OPAQUE_MEMORY; // Configure Media SDK to keep more operations in flight // - AsyncDepth represents the number of tasks that can be submitted, before synchronizing is required VPPParams.AsyncDepth = mfxDecParams.AsyncDepth; // Initialize encoder parameters mfxVideoParam mfxEncParams; memset(&mfxEncParams, 0, sizeof(mfxEncParams)); mfxEncParams.mfx.CodecId = MFX_CODEC_AVC; mfxEncParams.mfx.TargetUsage = MFX_TARGETUSAGE_BALANCED; mfxEncParams.mfx.TargetKbps = 500; mfxEncParams.mfx.RateControlMethod = MFX_RATECONTROL_VBR; mfxEncParams.mfx.FrameInfo.FrameRateExtN = 30; mfxEncParams.mfx.FrameInfo.FrameRateExtD = 1; mfxEncParams.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12; mfxEncParams.mfx.FrameInfo.ChromaFormat = MFX_CHROMAFORMAT_YUV420; mfxEncParams.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; mfxEncParams.mfx.FrameInfo.CropX = 0; mfxEncParams.mfx.FrameInfo.CropY = 0; mfxEncParams.mfx.FrameInfo.CropW = VPPParams.vpp.Out.CropW; // Half the resolution of decode stream mfxEncParams.mfx.FrameInfo.CropH = VPPParams.vpp.Out.CropH; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture mfxEncParams.mfx.FrameInfo.Width = MSDK_ALIGN16(mfxEncParams.mfx.FrameInfo.CropW); mfxEncParams.mfx.FrameInfo.Height = (MFX_PICSTRUCT_PROGRESSIVE == mfxEncParams.mfx.FrameInfo.PicStruct)? MSDK_ALIGN16(mfxEncParams.mfx.FrameInfo.CropH) : MSDK_ALIGN32(mfxEncParams.mfx.FrameInfo.CropH); mfxEncParams.IOPattern = MFX_IOPATTERN_IN_OPAQUE_MEMORY; // Configure Media SDK to keep more operations in flight // - AsyncDepth represents the number of tasks that can be submitted, before synchronizing is required mfxEncParams.AsyncDepth = mfxDecParams.AsyncDepth; // Query number required surfaces for decoder mfxFrameAllocRequest DecRequest; memset(&DecRequest, 0, sizeof(DecRequest)); sts = pmfxDEC->QueryIOSurf(&mfxDecParams, &DecRequest); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number required surfaces for encoder mfxFrameAllocRequest EncRequest; memset(&EncRequest, 0, sizeof(EncRequest)); sts = pmfxENC->QueryIOSurf(&mfxEncParams, &EncRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number of required surfaces for VPP mfxFrameAllocRequest VPPRequest[2];// [0] - in, [1] - out memset(&VPPRequest, 0, sizeof(mfxFrameAllocRequest)*2); sts = pmfxVPP->QueryIOSurf(&VPPParams, VPPRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Determine the required number of surfaces for decoder output (VPP input) and for VPP output (encoder input) mfxU16 nSurfNumDecVPP = DecRequest.NumFrameSuggested + VPPRequest[0].NumFrameSuggested + VPPParams.AsyncDepth; mfxU16 nSurfNumVPPEnc = EncRequest.NumFrameSuggested + VPPRequest[1].NumFrameSuggested + VPPParams.AsyncDepth; // Initialize shared surfaces for decoder, VPP and encode // - Note that no buffer memory is allocated, for opaque memory this is handled by Media SDK internally // - Frame surface array keeps reference to all surfaces // - Opaque memory is configured with the mfxExtOpaqueSurfaceAlloc extended buffers mfxFrameSurface1** pSurfaces = new mfxFrameSurface1*[nSurfNumDecVPP]; MSDK_CHECK_POINTER(pSurfaces, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nSurfNumDecVPP; i++) { pSurfaces[i] = new mfxFrameSurface1; MSDK_CHECK_POINTER(pSurfaces[i], MFX_ERR_MEMORY_ALLOC); memset(pSurfaces[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pSurfaces[i]->Info), &(DecRequest.Info), sizeof(mfxFrameInfo)); } mfxFrameSurface1** pSurfaces2 = new mfxFrameSurface1*[nSurfNumVPPEnc]; MSDK_CHECK_POINTER(pSurfaces2, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nSurfNumVPPEnc; i++) { pSurfaces2[i] = new mfxFrameSurface1; MSDK_CHECK_POINTER(pSurfaces2[i], MFX_ERR_MEMORY_ALLOC); memset(pSurfaces2[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pSurfaces2[i]->Info), &(EncRequest.Info), sizeof(mfxFrameInfo)); } mfxExtOpaqueSurfaceAlloc extOpaqueAllocDec; memset(&extOpaqueAllocDec, 0, sizeof(extOpaqueAllocDec)); extOpaqueAllocDec.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION; extOpaqueAllocDec.Header.BufferSz = sizeof(mfxExtOpaqueSurfaceAlloc); mfxExtBuffer* pExtParamsDec = (mfxExtBuffer*)&extOpaqueAllocDec; mfxExtOpaqueSurfaceAlloc extOpaqueAllocVPP; memset(&extOpaqueAllocVPP, 0, sizeof(extOpaqueAllocVPP)); extOpaqueAllocVPP.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION; extOpaqueAllocVPP.Header.BufferSz = sizeof(mfxExtOpaqueSurfaceAlloc); mfxExtBuffer* pExtParamsVPP = (mfxExtBuffer*)&extOpaqueAllocVPP; mfxExtOpaqueSurfaceAlloc extOpaqueAllocEnc; memset(&extOpaqueAllocEnc, 0, sizeof(extOpaqueAllocEnc)); extOpaqueAllocEnc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION; extOpaqueAllocEnc.Header.BufferSz = sizeof(mfxExtOpaqueSurfaceAlloc); mfxExtBuffer* pExtParamsENC = (mfxExtBuffer*)&extOpaqueAllocEnc; extOpaqueAllocDec.Out.Surfaces = pSurfaces; extOpaqueAllocDec.Out.NumSurface = nSurfNumDecVPP; extOpaqueAllocDec.Out.Type = DecRequest.Type; memcpy(&extOpaqueAllocVPP.In, &extOpaqueAllocDec.Out, sizeof(extOpaqueAllocDec.Out)); extOpaqueAllocVPP.Out.Surfaces = pSurfaces2; extOpaqueAllocVPP.Out.NumSurface = nSurfNumVPPEnc; extOpaqueAllocVPP.Out.Type = EncRequest.Type; memcpy(&extOpaqueAllocEnc.In, &extOpaqueAllocVPP.Out, sizeof(extOpaqueAllocVPP.Out)); mfxDecParams.ExtParam = &pExtParamsDec; mfxDecParams.NumExtParam = 1; VPPParams.ExtParam = &pExtParamsVPP; VPPParams.NumExtParam = 1; mfxEncParams.ExtParam = &pExtParamsENC; mfxEncParams.NumExtParam = 1; // Initialize the Media SDK decoder sts = pmfxDEC->Init(&mfxDecParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize the Media SDK encoder sts = pmfxENC->Init(&mfxEncParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize Media SDK VPP sts = pmfxVPP->Init(&VPPParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Retrieve video parameters selected by encoder. // - BufferSizeInKB parameter is required to set bit stream buffer size mfxVideoParam par; memset(&par, 0, sizeof(par)); sts = pmfxENC->GetVideoParam(&par); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create task pool to improve asynchronous performance (greater GPU utilization) mfxU16 taskPoolSize = mfxEncParams.AsyncDepth; // number of tasks that can be submitted, before synchronizing is required Task* pTasks = new Task[taskPoolSize]; memset(pTasks, 0, sizeof(Task) * taskPoolSize); for(int i=0;i<taskPoolSize;i++) { // Prepare Media SDK bit stream buffer pTasks[i].mfxBS.MaxLength = par.mfx.BufferSizeInKB * 1000; pTasks[i].mfxBS.Data = new mfxU8[pTasks[i].mfxBS.MaxLength]; MSDK_CHECK_POINTER(pTasks[i].mfxBS.Data, MFX_ERR_MEMORY_ALLOC); } // =================================== // Start transcoding the frames // #ifdef ENABLE_BENCHMARK LARGE_INTEGER tStart, tEnd; QueryPerformanceFrequency(&tStart); double freq = (double)tStart.QuadPart; QueryPerformanceCounter(&tStart); #endif mfxSyncPoint syncpD, syncpV; mfxFrameSurface1* pmfxOutSurface = NULL; mfxU32 nFrame = 0; int nIndex = 0; int nIndex2 = 0; int nFirstSyncTask = 0; int nTaskIdx = 0; // // Stage 1: Main transcoding loop // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } else { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // just wait and then repeat the same call to DecodeFrameAsync if (MFX_ERR_MORE_DATA == sts) { sts = ReadBitStreamData(&mfxBS, fSource); // Read more data to input bit stream MSDK_BREAK_ON_ERROR(sts); } if (MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE == sts) { nIndex = GetFreeSurfaceIndex(pSurfaces, nSurfNumDecVPP); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; } // Decode a frame asychronously (returns immediately) sts = pmfxDEC->DecodeFrameAsync(&mfxBS, pSurfaces[nIndex], &pmfxOutSurface, &syncpD); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncpD) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) { nIndex2 = GetFreeSurfaceIndex(pSurfaces2, nSurfNumVPPEnc); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = pmfxVPP->RunFrameVPPAsync(pmfxOutSurface, pSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } // VPP needs more data, let decoder decode another frame as input if (MFX_ERR_MORE_DATA == sts) { continue; } else if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); for (;;) { // Encode a frame asychronously (returns immediately) sts = pmfxENC->EncodeFrameAsync(NULL, pSurfaces2[nIndex2], &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) { // Allocate more bitstream buffer memory here if needed... break; } else break; } } } } // MFX_ERR_MORE_DATA means that file has ended, need to go to buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 2: Retrieve the buffered decoded frames // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_SURFACE == sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } else { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); nIndex = GetFreeSurfaceIndex(pSurfaces, nSurfNumDecVPP); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; // Decode a frame asychronously (returns immediately) sts = pmfxDEC->DecodeFrameAsync(NULL, pSurfaces[nIndex], &pmfxOutSurface, &syncpD); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncpD) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) { nIndex2 = GetFreeSurfaceIndex(pSurfaces2, nSurfNumVPPEnc); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = pmfxVPP->RunFrameVPPAsync(pmfxOutSurface, pSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } // VPP needs more data, let decoder decode another frame as input if (MFX_ERR_MORE_DATA == sts) { continue; } else if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); for (;;) { // Encode a frame asychronously (returns immediately) sts = pmfxENC->EncodeFrameAsync(NULL, pSurfaces2[nIndex2], &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) { // Allocate more bitstream buffer memory here if needed... break; } else break; } } } } // MFX_ERR_MORE_DATA indicates that all decode buffers has been fetched, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 3: Retrieve buffered frames from VPP // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } else { nIndex2 = GetFreeSurfaceIndex(pSurfaces2, nSurfNumVPPEnc); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = pmfxVPP->RunFrameVPPAsync(NULL, pSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); for (;;) { // Encode a frame asychronously (returns immediately) sts = pmfxENC->EncodeFrameAsync(NULL, pSurfaces2[nIndex2], &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts) { // Allocate more bitstream buffer memory here if needed... break; } else break; } } } // MFX_ERR_MORE_DATA indicates that all VPP buffers has been fetched, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 4: Retrieve the buffered encoded frames // while (MFX_ERR_NONE <= sts) { nTaskIdx = GetFreeTaskIndex(pTasks, taskPoolSize); // Find free task if(MFX_ERR_NOT_FOUND == nTaskIdx) { // No more free tasks, need to sync sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } else { for (;;) { // Encode a frame asychronously (returns immediately) sts = pmfxENC->EncodeFrameAsync(NULL, NULL, &pTasks[nTaskIdx].mfxBS, &pTasks[nTaskIdx].syncp); if (MFX_ERR_NONE < sts && !pTasks[nTaskIdx].syncp) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && pTasks[nTaskIdx].syncp) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; } } } // MFX_ERR_MORE_DATA indicates that there are no more buffered frames, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 5: Sync all remaining tasks in task pool // while(pTasks[nFirstSyncTask].syncp) { sts = pmfxSession->SyncOperation(pTasks[nFirstSyncTask].syncp, 60000); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = WriteBitStreamFrame(&pTasks[nFirstSyncTask].mfxBS, fSink); MSDK_BREAK_ON_ERROR(sts); pTasks[nFirstSyncTask].syncp = NULL; pTasks[nFirstSyncTask].mfxBS.DataLength = 0; pTasks[nFirstSyncTask].mfxBS.DataOffset = 0; nFirstSyncTask = (nFirstSyncTask + 1) % taskPoolSize; ++nFrame; #ifdef ENABLE_OUTPUT if((nFrame % 100) == 0) printf("(%d) Frame number: %d\n", id, nFrame); #endif } #ifdef ENABLE_BENCHMARK QueryPerformanceCounter(&tEnd); double duration = ((double)tEnd.QuadPart - (double)tStart.QuadPart) / freq; printf("\n[%d] Execution time: %3.2fs (%3.2ffps)\n", pData->id, duration, nFrame/duration); #endif // =================================================================== // Clean up resources // - It is recommended to close Media SDK components first, before releasing allocated surfaces, since // some surfaces may still be locked by internal Media SDK resources. pmfxENC->Close(); pmfxDEC->Close(); pmfxVPP->Close(); delete pmfxENC; delete pmfxDEC; delete pmfxVPP; pmfxSession->Close(); delete pmfxSession; for (int i = 0; i < nSurfNumDecVPP; i++) delete pSurfaces[i]; for (int i = 0; i < nSurfNumVPPEnc; i++) delete pSurfaces2[i]; MSDK_SAFE_DELETE_ARRAY(pSurfaces); MSDK_SAFE_DELETE_ARRAY(pSurfaces2); MSDK_SAFE_DELETE_ARRAY(mfxBS.Data); for(int i=0;i<taskPoolSize;i++) MSDK_SAFE_DELETE_ARRAY(pTasks[i].mfxBS.Data); MSDK_SAFE_DELETE_ARRAY(pTasks); fclose(fSource); fclose(fSink); return 0; }
int main() { mfxStatus sts = MFX_ERR_NONE; mfxU16 inputWidth = 1920; mfxU16 inputHeight = 1080; // ===================================================================== // Intel Media SDK Video Pre/Post Processing (VPP) pipeline setup // - Showcasing two VPP features // - Resize (frame width and height is halved) // - ProcAmp: Increase brightness // - Video memory surfaces are used // // Open input YV12 YUV file FILE* fSource; fopen_s(&fSource, "bbb1920x1080.yuv", "rb"); MSDK_CHECK_POINTER(fSource, MFX_ERR_NULL_PTR); // Create output YUV file FILE* fSink; fopen_s(&fSink, "bbb960x540_vpp_bright_d3d.yuv", "wb"); MSDK_CHECK_POINTER(fSink, MFX_ERR_NULL_PTR); // Initialize Media SDK session // - MFX_IMPL_AUTO_ANY selects HW accelaration if available (on any adapter) // - Version 1.0 is selected for greatest backwards compatibility. // If more recent API features are needed, change the version accordingly mfxIMPL impl = MFX_IMPL_AUTO_ANY; #ifdef DX11_D3D impl |= MFX_IMPL_VIA_D3D11; #endif mfxVersion ver = {0, 1}; MFXVideoSession mfxSession; sts = mfxSession.Init(impl, &ver); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize VPP parameters mfxVideoParam VPPParams; memset(&VPPParams, 0, sizeof(VPPParams)); // Input data VPPParams.vpp.In.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.In.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.In.CropX = 0; VPPParams.vpp.In.CropY = 0; VPPParams.vpp.In.CropW = inputWidth; VPPParams.vpp.In.CropH = inputHeight; VPPParams.vpp.In.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.In.FrameRateExtN = 30; VPPParams.vpp.In.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.In.Width = MSDK_ALIGN16(inputWidth); VPPParams.vpp.In.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.In.PicStruct)? MSDK_ALIGN16(inputHeight) : MSDK_ALIGN32(inputHeight); // Output data VPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.Out.CropX = 0; VPPParams.vpp.Out.CropY = 0; VPPParams.vpp.Out.CropW = inputWidth/2; VPPParams.vpp.Out.CropH = inputHeight/2; VPPParams.vpp.Out.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.Out.FrameRateExtN = 30; VPPParams.vpp.Out.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.Out.Width = MSDK_ALIGN16(VPPParams.vpp.Out.CropW); VPPParams.vpp.Out.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.Out.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.Out.CropH) : MSDK_ALIGN32(VPPParams.vpp.Out.CropH); VPPParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY; // Create Media SDK VPP component MFXVideoVPP mfxVPP(mfxSession); // Create DirectX device context mfxHDL deviceHandle; sts = CreateHWDevice(mfxSession, &deviceHandle, NULL); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Provide device manager to Media SDK sts = mfxSession.SetHandle(DEVICE_MGR_TYPE, deviceHandle); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxFrameAllocator mfxAllocator; mfxAllocator.Alloc = simple_alloc; mfxAllocator.Free = simple_free; mfxAllocator.Lock = simple_lock; mfxAllocator.Unlock = simple_unlock; mfxAllocator.GetHDL = simple_gethdl; // When using video memory we must provide Media SDK with an external allocator sts = mfxSession.SetFrameAllocator(&mfxAllocator); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number of required surfaces for VPP mfxFrameAllocRequest VPPRequest[2];// [0] - in, [1] - out memset(&VPPRequest, 0, sizeof(mfxFrameAllocRequest)*2); sts = mfxVPP.QueryIOSurf(&VPPParams, VPPRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); #ifdef DX11_D3D VPPRequest[0].Type |= WILL_WRITE; // Hint to DX11 memory handler that application will write data to input surfaces VPPRequest[1].Type |= WILL_READ; // Hint to DX11 memory handler that application will read data from output surfaces #endif // Allocate required surfaces mfxFrameAllocResponse mfxResponseIn; mfxFrameAllocResponse mfxResponseOut; sts = mfxAllocator.Alloc(mfxAllocator.pthis, &VPPRequest[0], &mfxResponseIn); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = mfxAllocator.Alloc(mfxAllocator.pthis, &VPPRequest[1], &mfxResponseOut); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); mfxU16 nVPPSurfNumIn = mfxResponseIn.NumFrameActual; mfxU16 nVPPSurfNumOut = mfxResponseOut.NumFrameActual; // Allocate surface headers (mfxFrameSurface1) for VPP mfxFrameSurface1** pVPPSurfacesIn = new mfxFrameSurface1*[nVPPSurfNumIn]; MSDK_CHECK_POINTER(pVPPSurfacesIn, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nVPPSurfNumIn; i++) { pVPPSurfacesIn[i] = new mfxFrameSurface1; memset(pVPPSurfacesIn[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pVPPSurfacesIn[i]->Info), &(VPPParams.vpp.In), sizeof(mfxFrameInfo)); pVPPSurfacesIn[i]->Data.MemId = mfxResponseIn.mids[i]; // MID (memory id) represent one D3D NV12 surface #ifndef ENABLE_INPUT // In case simulating direct access to frames we initialize the allocated surfaces with default pattern // - For true benchmark comparisons to async workloads all surfaces must have the same data #ifndef DX11_D3D IDirect3DSurface9 *pSurface; D3DSURFACE_DESC desc; D3DLOCKED_RECT locked; pSurface = (IDirect3DSurface9 *)mfxResponseIn.mids[i]; pSurface->GetDesc(&desc); pSurface->LockRect(&locked, 0, D3DLOCK_NOSYSLOCK); memset((mfxU8 *)locked.pBits, 100, desc.Height*locked.Pitch); // Y plane memset((mfxU8 *)locked.pBits + desc.Height * locked.Pitch, 50, (desc.Height*locked.Pitch)/2); // UV plane pSurface->UnlockRect(); #else // For now, just leave D3D11 surface data uninitialized #endif #endif } mfxFrameSurface1** pVPPSurfacesOut = new mfxFrameSurface1*[nVPPSurfNumOut]; MSDK_CHECK_POINTER(pVPPSurfacesOut, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nVPPSurfNumOut; i++) { pVPPSurfacesOut[i] = new mfxFrameSurface1; memset(pVPPSurfacesOut[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pVPPSurfacesOut[i]->Info), &(VPPParams.vpp.Out), sizeof(mfxFrameInfo)); pVPPSurfacesOut[i]->Data.MemId = mfxResponseOut.mids[i]; // MID (memory id) represent one D3D NV12 surface } // Initialize extended buffer for frame processing // - Process amplifier (ProcAmp) used to control brightness // - mfxExtVPPDoUse: Define the processing algorithm to be used // - mfxExtVPPProcAmp: ProcAmp configuration // - mfxExtBuffer: Add extended buffers to VPP parameter configuration mfxExtVPPDoUse extDoUse; mfxU32 tabDoUseAlg[1]; extDoUse.Header.BufferId = MFX_EXTBUFF_VPP_DOUSE; extDoUse.Header.BufferSz = sizeof(mfxExtVPPDoUse); extDoUse.NumAlg = 1; extDoUse.AlgList = tabDoUseAlg; tabDoUseAlg[0] = MFX_EXTBUFF_VPP_PROCAMP; mfxExtVPPProcAmp procampConfig; procampConfig.Header.BufferId = MFX_EXTBUFF_VPP_PROCAMP; procampConfig.Header.BufferSz = sizeof(mfxExtVPPProcAmp); procampConfig.Hue = 0.0f; // Default procampConfig.Saturation = 1.0f; // Default procampConfig.Contrast = 1.0; // Default procampConfig.Brightness = 40.0; // Adjust brightness mfxExtBuffer* ExtBuffer[2]; ExtBuffer[0] = (mfxExtBuffer*)&extDoUse; ExtBuffer[1] = (mfxExtBuffer*)&procampConfig; VPPParams.NumExtParam = 2; VPPParams.ExtParam = (mfxExtBuffer**)&ExtBuffer[0]; // Initialize Media SDK VPP sts = mfxVPP.Init(&VPPParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // =================================== // Start processing the frames // #ifdef ENABLE_BENCHMARK LARGE_INTEGER tStart, tEnd; QueryPerformanceFrequency(&tStart); double freq = (double)tStart.QuadPart; QueryPerformanceCounter(&tStart); #endif int nSurfIdxIn = 0, nSurfIdxOut = 0; mfxSyncPoint syncp; mfxU32 nFrame = 0; // // Stage 1: Main processing loop // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts) { nSurfIdxIn = GetFreeSurfaceIndex(pVPPSurfacesIn, nVPPSurfNumIn); // Find free input frame surface if (MFX_ERR_NOT_FOUND == nSurfIdxIn) return MFX_ERR_MEMORY_ALLOC; // Surface locking required when read/write D3D surfaces sts = mfxAllocator.Lock(mfxAllocator.pthis, pVPPSurfacesIn[nSurfIdxIn]->Data.MemId, &(pVPPSurfacesIn[nSurfIdxIn]->Data)); MSDK_BREAK_ON_ERROR(sts); sts = LoadRawFrame(pVPPSurfacesIn[nSurfIdxIn], fSource); // Load frame from file into surface MSDK_BREAK_ON_ERROR(sts); sts = mfxAllocator.Unlock(mfxAllocator.pthis, pVPPSurfacesIn[nSurfIdxIn]->Data.MemId, &(pVPPSurfacesIn[nSurfIdxIn]->Data)); MSDK_BREAK_ON_ERROR(sts); nSurfIdxOut = GetFreeSurfaceIndex(pVPPSurfacesOut, nVPPSurfNumOut); // Find free output frame surface if (MFX_ERR_NOT_FOUND == nSurfIdxOut) return MFX_ERR_MEMORY_ALLOC; // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(pVPPSurfacesIn[nSurfIdxIn], pVPPSurfacesOut[nSurfIdxOut], NULL, &syncp); if (MFX_ERR_MORE_DATA == sts) continue; // MFX_ERR_MORE_SURFACE means output is ready but need more surface (example: Frame Rate Conversion 30->60) // * Not handled in this example! MSDK_BREAK_ON_ERROR(sts); sts = mfxSession.SyncOperation(syncp, 60000); // Synchronize. Wait until frame processing is ready MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); ++nFrame; #ifdef ENABLE_OUTPUT // Surface locking required when read/write D3D surfaces sts = mfxAllocator.Lock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data)); MSDK_BREAK_ON_ERROR(sts); sts = WriteRawFrame(pVPPSurfacesOut[nSurfIdxOut], fSink); MSDK_BREAK_ON_ERROR(sts); sts = mfxAllocator.Unlock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data)); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } // MFX_ERR_MORE_DATA means that the input file has ended, need to go to buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 2: Retrieve the buffered VPP frames // while (MFX_ERR_NONE <= sts) { nSurfIdxOut = GetFreeSurfaceIndex(pVPPSurfacesOut, nVPPSurfNumOut); // Find free frame surface if (MFX_ERR_NOT_FOUND == nSurfIdxOut) return MFX_ERR_MEMORY_ALLOC; // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(NULL, pVPPSurfacesOut[nSurfIdxOut], NULL, &syncp); MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_SURFACE); MSDK_BREAK_ON_ERROR(sts); sts = mfxSession.SyncOperation(syncp, 60000); // Synchronize. Wait until frame processing is ready MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); ++nFrame; #ifdef ENABLE_OUTPUT // Surface locking required when read/write D3D surfaces sts = mfxAllocator.Lock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data)); MSDK_BREAK_ON_ERROR(sts); sts = WriteRawFrame(pVPPSurfacesOut[nSurfIdxOut], fSink); MSDK_BREAK_ON_ERROR(sts); sts = mfxAllocator.Unlock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data)); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } // MFX_ERR_MORE_DATA indicates that there are no more buffered frames, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); #ifdef ENABLE_BENCHMARK QueryPerformanceCounter(&tEnd); double duration = ((double)tEnd.QuadPart - (double)tStart.QuadPart) / freq; printf("\nExecution time: %3.2fs (%3.2ffps)\n", duration, nFrame/duration); #endif // =================================================================== // Clean up resources // - It is recommended to close Media SDK components first, before releasing allocated surfaces, since // some surfaces may still be locked by internal Media SDK resources. mfxVPP.Close(); //mfxSession closed automatically on destruction for (int i = 0; i < nVPPSurfNumIn; i++) delete pVPPSurfacesIn[i]; MSDK_SAFE_DELETE_ARRAY(pVPPSurfacesIn); for (int i = 0; i < nVPPSurfNumOut; i++) delete pVPPSurfacesOut[i]; MSDK_SAFE_DELETE_ARRAY(pVPPSurfacesOut); fclose(fSource); fclose(fSink); CleanupHWDevice(); return 0; }
int main() { mfxStatus sts = MFX_ERR_NONE; // ===================================================================== // Intel Media SDK decode pipeline setup // - In this example we are decoding an AVC (H.264) stream // - For simplistic memory management, system memory surfaces are used to store the decoded frames // (Note that when using HW acceleration D3D surfaces are prefered, for better performance) // // - VPP used to post process (resize) the frame // // Open input H.264 elementary stream (ES) file FILE* fSource; fopen_s(&fSource, "bbb1920x1080.264", "rb"); MSDK_CHECK_POINTER(fSource, MFX_ERR_NULL_PTR); // Create output YUV file FILE* fSink; fopen_s(&fSink, "dectest_960x540.yuv", "wb"); MSDK_CHECK_POINTER(fSink, MFX_ERR_NULL_PTR); // Initialize Media SDK session // - MFX_IMPL_AUTO_ANY selects HW accelaration if available (on any adapter) // - Version 1.0 is selected for greatest backwards compatibility. // If more recent API features are needed, change the version accordingly mfxIMPL impl = MFX_IMPL_AUTO_ANY; mfxVersion ver = {0, 1}; MFXVideoSession mfxSession; sts = mfxSession.Init(impl, &ver); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Create Media SDK decoder MFXVideoDECODE mfxDEC(mfxSession); // Create Media SDK VPP component MFXVideoVPP mfxVPP(mfxSession); // Set required video parameters for decode // - In this example we are decoding an AVC (H.264) stream // - For simplistic memory management, system memory surfaces are used to store the decoded frames // (Note that when using HW acceleration D3D surfaces are prefered, for better performance) mfxVideoParam mfxVideoParams; memset(&mfxVideoParams, 0, sizeof(mfxVideoParams)); mfxVideoParams.mfx.CodecId = MFX_CODEC_AVC; mfxVideoParams.IOPattern = MFX_IOPATTERN_OUT_SYSTEM_MEMORY; // Prepare Media SDK bit stream buffer // - Arbitrary buffer size for this example mfxBitstream mfxBS; memset(&mfxBS, 0, sizeof(mfxBS)); mfxBS.MaxLength = 1024 * 1024; mfxBS.Data = new mfxU8[mfxBS.MaxLength]; MSDK_CHECK_POINTER(mfxBS.Data, MFX_ERR_MEMORY_ALLOC); // Read a chunk of data from stream file into bit stream buffer // - Parse bit stream, searching for header and fill video parameters structure // - Abort if bit stream header is not found in the first bit stream buffer chunk sts = ReadBitStreamData(&mfxBS, fSource); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); sts = mfxDEC.DecodeHeader(&mfxBS, &mfxVideoParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize VPP parameters // - For simplistic memory management, system memory surfaces are used to store the raw frames // (Note that when using HW acceleration D3D surfaces are prefered, for better performance) mfxVideoParam VPPParams; memset(&VPPParams, 0, sizeof(VPPParams)); // Input data VPPParams.vpp.In.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.In.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.In.CropX = 0; VPPParams.vpp.In.CropY = 0; VPPParams.vpp.In.CropW = mfxVideoParams.mfx.FrameInfo.CropW; VPPParams.vpp.In.CropH = mfxVideoParams.mfx.FrameInfo.CropH; VPPParams.vpp.In.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.In.FrameRateExtN = 30; VPPParams.vpp.In.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.In.Width = MSDK_ALIGN16(VPPParams.vpp.In.CropW); VPPParams.vpp.In.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.In.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.In.CropH) : MSDK_ALIGN32(VPPParams.vpp.In.CropH); // Output data VPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12; VPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420; VPPParams.vpp.Out.CropX = 0; VPPParams.vpp.Out.CropY = 0; VPPParams.vpp.Out.CropW = VPPParams.vpp.In.CropW/2; // Resize to half size resolution VPPParams.vpp.Out.CropH = VPPParams.vpp.In.CropH/2; VPPParams.vpp.Out.PicStruct = MFX_PICSTRUCT_PROGRESSIVE; VPPParams.vpp.Out.FrameRateExtN = 30; VPPParams.vpp.Out.FrameRateExtD = 1; // width must be a multiple of 16 // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture VPPParams.vpp.Out.Width = MSDK_ALIGN16(VPPParams.vpp.Out.CropW); VPPParams.vpp.Out.Height = (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.Out.PicStruct)? MSDK_ALIGN16(VPPParams.vpp.Out.CropH) : MSDK_ALIGN32(VPPParams.vpp.Out.CropH); VPPParams.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY | MFX_IOPATTERN_OUT_SYSTEM_MEMORY; // Query number of required surfaces for decoder mfxFrameAllocRequest DecRequest; memset(&DecRequest, 0, sizeof(DecRequest)); sts = mfxDEC.QueryIOSurf(&mfxVideoParams, &DecRequest); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Query number of required surfaces for VPP mfxFrameAllocRequest VPPRequest[2];// [0] - in, [1] - out memset(&VPPRequest, 0, sizeof(mfxFrameAllocRequest)*2); sts = mfxVPP.QueryIOSurf(&VPPParams, VPPRequest); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Determine the required number of surfaces for decoder output (VPP input) and for VPP output mfxU16 nSurfNumDecVPP = DecRequest.NumFrameSuggested + VPPRequest[0].NumFrameSuggested; mfxU16 nSurfNumVPPOut = VPPRequest[1].NumFrameSuggested; // Allocate surfaces for decoder and VPP In // - Width and height of buffer must be aligned, a multiple of 32 // - Frame surface array keeps pointers all surface planes and general frame info mfxU16 width = (mfxU16)MSDK_ALIGN32(DecRequest.Info.Width); mfxU16 height = (mfxU16)MSDK_ALIGN32(DecRequest.Info.Height); mfxU8 bitsPerPixel = 12; // NV12 format is a 12 bits per pixel format mfxU32 surfaceSize = width * height * bitsPerPixel / 8; mfxU8* surfaceBuffers = (mfxU8 *)new mfxU8[surfaceSize * nSurfNumDecVPP]; mfxFrameSurface1** pmfxSurfaces = new mfxFrameSurface1*[nSurfNumDecVPP]; MSDK_CHECK_POINTER(pmfxSurfaces, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nSurfNumDecVPP; i++) { pmfxSurfaces[i] = new mfxFrameSurface1; memset(pmfxSurfaces[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pmfxSurfaces[i]->Info), &(mfxVideoParams.mfx.FrameInfo), sizeof(mfxFrameInfo)); pmfxSurfaces[i]->Data.Y = &surfaceBuffers[surfaceSize * i]; pmfxSurfaces[i]->Data.U = pmfxSurfaces[i]->Data.Y + width * height; pmfxSurfaces[i]->Data.V = pmfxSurfaces[i]->Data.U + 1; pmfxSurfaces[i]->Data.Pitch = width; } // Allocate surfaces for VPP Out // - Width and height of buffer must be aligned, a multiple of 32 // - Frame surface array keeps pointers all surface planes and general frame info width = (mfxU16)MSDK_ALIGN32(VPPRequest[1].Info.Width); height = (mfxU16)MSDK_ALIGN32(VPPRequest[1].Info.Height); bitsPerPixel = 12; // NV12 format is a 12 bits per pixel format surfaceSize = width * height * bitsPerPixel / 8; mfxU8* surfaceBuffers2 = (mfxU8 *)new mfxU8[surfaceSize * nSurfNumVPPOut]; mfxFrameSurface1** pmfxSurfaces2 = new mfxFrameSurface1*[nSurfNumVPPOut]; MSDK_CHECK_POINTER(pmfxSurfaces2, MFX_ERR_MEMORY_ALLOC); for (int i = 0; i < nSurfNumVPPOut; i++) { pmfxSurfaces2[i] = new mfxFrameSurface1; memset(pmfxSurfaces2[i], 0, sizeof(mfxFrameSurface1)); memcpy(&(pmfxSurfaces2[i]->Info), &(VPPParams.vpp.Out), sizeof(mfxFrameInfo)); pmfxSurfaces2[i]->Data.Y = &surfaceBuffers[surfaceSize * i]; pmfxSurfaces2[i]->Data.U = pmfxSurfaces2[i]->Data.Y + width * height; pmfxSurfaces2[i]->Data.V = pmfxSurfaces2[i]->Data.U + 1; pmfxSurfaces2[i]->Data.Pitch = width; } // Initialize the Media SDK decoder sts = mfxDEC.Init(&mfxVideoParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // Initialize Media SDK VPP sts = mfxVPP.Init(&VPPParams); MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // =============================================================== // Start decoding the frames from the stream // #ifdef ENABLE_BENCHMARK LARGE_INTEGER tStart, tEnd; QueryPerformanceFrequency(&tStart); double freq = (double)tStart.QuadPart; QueryPerformanceCounter(&tStart); #endif mfxSyncPoint syncpD; mfxSyncPoint syncpV; mfxFrameSurface1* pmfxOutSurface = NULL; int nIndex = 0; int nIndex2 = 0; mfxU32 nFrame = 0; // // Stage 1: Main decoding loop // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts) { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // Wait if device is busy, then repeat the same call to DecodeFrameAsync if (MFX_ERR_MORE_DATA == sts) { sts = ReadBitStreamData(&mfxBS, fSource); // Read more data into input bit stream MSDK_BREAK_ON_ERROR(sts); } if (MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE == sts) { nIndex = GetFreeSurfaceIndex(pmfxSurfaces, nSurfNumDecVPP); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; } // Decode a frame asychronously (returns immediately) sts = mfxDEC.DecodeFrameAsync(&mfxBS, pmfxSurfaces[nIndex], &pmfxOutSurface, &syncpD); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncpD) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) { nIndex2 = GetFreeSurfaceIndex(pmfxSurfaces2, nSurfNumVPPOut); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(pmfxOutSurface, pmfxSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } // VPP needs more data, let decoder decode another frame as input if (MFX_ERR_MORE_DATA == sts) { continue; } else if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); } if (MFX_ERR_NONE == sts) sts = mfxSession.SyncOperation(syncpV, 60000); // Synchronize. Wait until decoded frame is ready if (MFX_ERR_NONE == sts) { ++nFrame; #ifdef ENABLE_OUTPUT sts = WriteRawFrame(pmfxSurfaces2[nIndex2], fSink); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } } // MFX_ERR_MORE_DATA means that file has ended, need to go to buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 2: Retrieve the buffered decoded frames // while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_SURFACE == sts) { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // Wait if device is busy, then repeat the same call to DecodeFrameAsync nIndex = GetFreeSurfaceIndex(pmfxSurfaces, nSurfNumDecVPP); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; // Decode a frame asychronously (returns immediately) sts = mfxDEC.DecodeFrameAsync(NULL, pmfxSurfaces[nIndex], &pmfxOutSurface, &syncpD); // Ignore warnings if output is available, // if no output and no action required just repeat the DecodeFrameAsync call if (MFX_ERR_NONE < sts && syncpD) sts = MFX_ERR_NONE; if (MFX_ERR_NONE == sts) { nIndex2 = GetFreeSurfaceIndex(pmfxSurfaces2, nSurfNumVPPOut); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex) return MFX_ERR_MEMORY_ALLOC; for (;;) { // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(pmfxOutSurface, pmfxSurfaces2[nIndex2], NULL, &syncpV); if (MFX_ERR_NONE < sts && !syncpV) // repeat the call if warning and no output { if (MFX_WRN_DEVICE_BUSY == sts) Sleep(1); // wait if device is busy } else if (MFX_ERR_NONE < sts && syncpV) { sts = MFX_ERR_NONE; // ignore warnings if output is available break; } else break; // not a warning } // VPP needs more data, let decoder decode another frame as input if (MFX_ERR_MORE_DATA == sts) { continue; } else if (MFX_ERR_MORE_SURFACE == sts) { // Not relevant for the illustrated workload! Therefore not handled. // Relevant for cases when VPP produces more frames at output than consumes at input. E.g. framerate conversion 30 fps -> 60 fps break; } else MSDK_BREAK_ON_ERROR(sts); } if (MFX_ERR_NONE == sts) sts = mfxSession.SyncOperation(syncpV, 60000); // Synchronize. Waits until decoded frame is ready if (MFX_ERR_NONE == sts) { ++nFrame; #ifdef ENABLE_OUTPUT sts = WriteRawFrame(pmfxSurfaces2[nIndex2], fSink); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } } // MFX_ERR_MORE_DATA means that decoder is done with buffered frames, need to go to VPP buffering loop, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); // // Stage 3: Retrieve the buffered VPP frames // while (MFX_ERR_NONE <= sts) { nIndex2 = GetFreeSurfaceIndex(pmfxSurfaces2, nSurfNumVPPOut); // Find free frame surface if (MFX_ERR_NOT_FOUND == nIndex2) return MFX_ERR_MEMORY_ALLOC; // Process a frame asychronously (returns immediately) sts = mfxVPP.RunFrameVPPAsync(NULL, pmfxSurfaces2[nIndex2], NULL, &syncpV); MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_SURFACE); MSDK_BREAK_ON_ERROR(sts); sts = mfxSession.SyncOperation(syncpV, 60000); // Synchronize. Wait until frame processing is ready MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); ++nFrame; #ifdef ENABLE_OUTPUT sts = WriteRawFrame(pmfxSurfaces2[nIndex2], fSink); MSDK_BREAK_ON_ERROR(sts); printf("Frame number: %d\r", nFrame); #endif } // MFX_ERR_MORE_DATA indicates that all buffers has been fetched, exit in case of other errors MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA); MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts); #ifdef ENABLE_BENCHMARK QueryPerformanceCounter(&tEnd); double duration = ((double)tEnd.QuadPart - (double)tStart.QuadPart) / freq; printf("\nExecution time: %3.2fs (%3.2ffps)\n", duration, nFrame/duration); #endif // =================================================================== // Clean up resources // - It is recommended to close Media SDK components first, before releasing allocated surfaces, since // some surfaces may still be locked by internal Media SDK resources. mfxDEC.Close(); mfxVPP.Close(); // mfxSession closed automatically on destruction for (int i = 0; i < nSurfNumDecVPP; i++) delete pmfxSurfaces[i]; for (int i = 0; i < nSurfNumVPPOut; i++) delete pmfxSurfaces2[i]; MSDK_SAFE_DELETE_ARRAY(pmfxSurfaces); MSDK_SAFE_DELETE_ARRAY(pmfxSurfaces2); MSDK_SAFE_DELETE_ARRAY(surfaceBuffers); MSDK_SAFE_DELETE_ARRAY(surfaceBuffers2); MSDK_SAFE_DELETE_ARRAY(mfxBS.Data); fclose(fSource); fclose(fSink); return 0; }