/*********************************************************************** * MoDeleteMediaType (MSDMO.@) * * Delete a media type structure */ HRESULT WINAPI MoDeleteMediaType(DMO_MEDIA_TYPE* pmedia) { TRACE("%p\n", pmedia); if (!pmedia) return E_POINTER; MoFreeMediaType(pmedia); CoTaskMemFree(pmedia); return S_OK; }
void KinectAudioSource::Start() { DMO_MEDIA_TYPE mt = {0}; ULONG cbProduced = 0; memset( &outputBufferStruct_, 0, sizeof(outputBufferStruct_) ); outputBufferStruct_.pBuffer = &mediaBuffer_; // Set DMO output format CHECKHR( MoInitMediaType(&mt, sizeof(WAVEFORMATEX)) ); mt.majortype = MEDIATYPE_Audio; mt.subtype = MEDIASUBTYPE_PCM; mt.lSampleSize = 0; mt.bFixedSizeSamples = TRUE; mt.bTemporalCompression = FALSE; mt.formattype = FORMAT_WaveFormatEx; memcpy(mt.pbFormat, &GetWaveFormat(), sizeof(WAVEFORMATEX)); CHECKHR( mediaObject_->SetOutputType(0, &mt, 0) ); MoFreeMediaType(&mt); // Allocate streaming resources. This step is optional. If it is not called here, it // will be called when first time ProcessInput() is called. However, if you want to // get the actual frame size being used, it should be called explicitly here. CHECKHR( mediaObject_->AllocateStreamingResources() ); // Get actually frame size being used in the DMO. (optional, do as you need) int iFrameSize; PROPVARIANT pvFrameSize; PropVariantInit(&pvFrameSize); CHECKHR(propertyStore_->GetValue(MFPKEY_WMAAECMA_FEATR_FRAME_SIZE, &pvFrameSize)); iFrameSize = pvFrameSize.lVal; PropVariantClear(&pvFrameSize); // allocate output buffer mediaBuffer_.SetBufferLength( GetWaveFormat().nSamplesPerSec * GetWaveFormat().nBlockAlign ); }
////////////////////////////////////////////////////////////////////////////// // // CGargle::Process // HRESULT CGargle::Process(ULONG ulQuanta, LPBYTE pcbData, REFERENCE_TIME rtStart, DWORD dwFlags) { // Update parameter values from any curves that may be in effect. // We pick up the current values stored in the CParamsManager helper for time rtStart. // Note that we are using IMediaParams in a less than // perfect way. We update at the beginning of every time slice instead of smoothly over the curve. // This is okay for an effect like gargle as long as the time slice is consistently small (which // it conveniently is when hosted in DSound.) // However, in the future we will update this sample to use a more appropriate and accurate // mechanism. // Here are some suggestions of how it can be done, with increasing degree of accuracy. Different // types of effects and effect parameters require different levels of accuracy, so no solution is the best // solution for all (especially if you are concerned about CPU cost.) // 1) Break the time slice up into mini pieces of some number of milliseconds // each and run through all the steps in Process for each sub slice. This guarantees the // stair stepping is small enough not to be noticable. This approach will work well for parameters // that don't create an audible stair stepping noise (or "zipper") noise when controled in this way. // Control over volume, for example, does not work well. // 2) Use the above mechanism, but pass the start and end values for each parameter to the // processing engine. It, in turn, applies linear interpolation to each parameter. This results // in a smooth approximation of the parameter curve and removes all but the most subtle aliasing noise. // 3) Pass the curves directly to the processing engine, which accurately calculates each sample // mathematically. This is obviously the best, but most complex and CPU intensive. this->UpdateActiveParams(rtStart, *this); DMO_MEDIA_TYPE mt; HRESULT hr = GetInputCurrentType(0, &mt); if( FAILED( hr ) ) return hr; // convert bytes to samples for the FBRProcess call assert(mt.formattype == FORMAT_WaveFormatEx); ulQuanta /= LPWAVEFORMATEX(mt.pbFormat)->nBlockAlign; MoFreeMediaType( &mt ); return FBRProcess(ulQuanta, pcbData, pcbData); }
/*********************************************************************** * MoDuplicateMediaType (MSDMO.@) * * Duplicates a media type structure */ HRESULT WINAPI MoDuplicateMediaType(DMO_MEDIA_TYPE** ppdst, const DMO_MEDIA_TYPE* psrc) { HRESULT r; TRACE("%p %p\n", ppdst, psrc); if (!ppdst || !psrc) return E_POINTER; *ppdst = CoTaskMemAlloc(sizeof(DMO_MEDIA_TYPE)); if (!*ppdst) return E_OUTOFMEMORY; r = MoCopyMediaType(*ppdst, psrc); if (FAILED(r)) { MoFreeMediaType(*ppdst); *ppdst = NULL; } return r; }
///////////////////// // // IMediaObjectInPlace::Clone // // The Clone method creates a copy of the DMO in its current state. // // Parameters // // ppMediaObject // [out] Address of a pointer to receive the new DMO's // IMediaObjectInPlace interface. // // Return Value // Returns S_OK if successful. Otherwise, returns an HRESULT value // indicating the cause of the error. // // If the method succeeds, the IMediaObjectInPlace interface that it returns // has an outstanding reference count. Be sure to release the interface when // you are finished using it. // HRESULT CHXAudioDeviceHookBase::Clone(IMediaObjectInPlace **ppMediaObject) { // Check the input pointer if (!ppMediaObject) { return E_POINTER; } // This will be cleaned up when client releases the newly created object // or if there's some error along the way CHXAudioDeviceHookBase * pNewHXAudioDeviceHook = new CComObject<CHXAudioDeviceHookBase>; if( !pNewHXAudioDeviceHook ) { return E_OUTOFMEMORY; } HRESULT hr = S_OK; hr = pNewHXAudioDeviceHook->UpdateStatesInternal(); IMediaObject * pCloned = NULL; if( SUCCEEDED( hr ) ) { IUnknown *pUnk; hr = pNewHXAudioDeviceHook->QueryInterface( IID_IUnknown, (void **) &pUnk ); if( SUCCEEDED( hr ) ) { hr = pUnk->QueryInterface( IID_IMediaObject, (void **) &pCloned ); HX_RELEASE(pUnk); } } // Copy the input and output types if (SUCCEEDED(hr)) { DMO_MEDIA_TYPE mt; DWORD cInputStreams = 0; DWORD cOutputStreams = 0; GetStreamCount(&cInputStreams, &cOutputStreams); for (DWORD i = 0; i < cInputStreams && SUCCEEDED(hr); ++i) { hr = GetInputCurrentType(i, &mt); if (hr == DMO_E_TYPE_NOT_SET) { hr = S_OK; // great, don't need to set the cloned DMO } else if (SUCCEEDED(hr)) { hr = pCloned->SetInputType(i, &mt, 0); MoFreeMediaType( &mt ); } } for (DWORD i = 0; i < cOutputStreams && SUCCEEDED(hr); ++i) { hr = GetOutputCurrentType(i, &mt); if (hr == DMO_E_TYPE_NOT_SET) { hr = S_OK; // great, don't need to set the cloned DMO } else if (SUCCEEDED(hr)) { hr = pCloned->SetOutputType(i, &mt, 0); MoFreeMediaType( &mt ); } } if (SUCCEEDED(hr)) { hr = pCloned->QueryInterface(IID_IMediaObjectInPlace, (void**)ppMediaObject); } // Release the object's original ref. If clone succeeded (made it through QI) then returned pointer // has one ref. If we failed, refs drop to zero, freeing the object. HX_RELEASE(pCloned); } // Something went wrong, clean up for client if (FAILED(hr)) { delete pNewHXAudioDeviceHook; } return hr; }
////////////////////////////////////////////////////////////////////////////// // // CGargle::Clone // HRESULT CGargle::Clone(IMediaObjectInPlace **ppCloned) { if (!ppCloned) return E_POINTER; HRESULT hr = S_OK; CGargle * pNewGargle = new CComObject<CGargle>; if( !pNewGargle ) hr = E_OUTOFMEMORY; hr = pNewGargle->Init(); IMediaObject * pCloned = NULL; if( SUCCEEDED( hr ) ) { IUnknown *pUnk; hr = pNewGargle->QueryInterface( IID_IUnknown, (void **) &pUnk ); if( SUCCEEDED( hr ) ) { hr = pUnk->QueryInterface( IID_IMediaObject, (void **) &pCloned ); pUnk->Release(); } } else { return hr; } // // Copy parameter control information // if (SUCCEEDED(hr)) hr = pNewGargle->CopyParamsFromSource((CParamsManager *) this); // Copy current parameter values GargleFX params; if (SUCCEEDED(hr)) hr = GetAllParameters(¶ms); if (SUCCEEDED(hr)) hr = pNewGargle->SetAllParameters(¶ms); if (SUCCEEDED(hr)) { // Copy the input and output types DMO_MEDIA_TYPE mt; DWORD cInputStreams = 0; DWORD cOutputStreams = 0; GetStreamCount(&cInputStreams, &cOutputStreams); for (DWORD i = 0; i < cInputStreams && SUCCEEDED(hr); ++i) { hr = GetInputCurrentType(i, &mt); if (hr == DMO_E_TYPE_NOT_SET) { hr = S_OK; // great, don't need to set the cloned DMO } else if (SUCCEEDED(hr)) { hr = pCloned->SetInputType(i, &mt, 0); MoFreeMediaType( &mt ); } } for (i = 0; i < cOutputStreams && SUCCEEDED(hr); ++i) { hr = GetOutputCurrentType(i, &mt); if (hr == DMO_E_TYPE_NOT_SET) { hr = S_OK; // great, don't need to set the cloned DMO } else if (SUCCEEDED(hr)) { hr = pCloned->SetOutputType(i, &mt, 0); MoFreeMediaType( &mt ); } } if (SUCCEEDED(hr)) hr = pCloned->QueryInterface(IID_IMediaObjectInPlace, (void**)ppCloned); // Release the object's original ref. If clone succeeded (made it through QI) then returned pointer // has one ref. If we failed, refs drop to zero, freeing the object. pCloned->Release(); } return hr; }
int CPropertyForm::AnalyzeDMO(IUnknown *obj) { CComPtr<IMediaObject> dmo; HRESULT hr; hr = obj->QueryInterface(IID_IMediaObject, (void**)&dmo); if (FAILED(hr)) return -1; // fine - it's a DMO object. now check what kind of it is. int i = 0; int dmo_type = -1; // unknown DMO_MEDIA_TYPE dmt; memset(&dmt, 0, sizeof(dmt)); // first check if it is connected hr = dmo->GetInputCurrentType(0, &dmt); if (FAILED(hr)) { // or try to enumerate through the types while (dmo_type == -1) { hr = dmo->GetInputType(0, i++, &dmt); if (hr == NOERROR) { // currently we're only interested in WMA Decoder if (dmt.majortype == MEDIATYPE_Audio && ( dmt.subtype == GraphStudio::MEDIASUBTYPE_WMA9_00 || dmt.subtype == GraphStudio::MEDIASUBTYPE_WMA9_01 || dmt.subtype == GraphStudio::MEDIASUBTYPE_WMA9_02 || dmt.subtype == GraphStudio::MEDIASUBTYPE_WMA9_03 ) ){ dmo_type = 0; // WMA } } MoFreeMediaType(&dmt); } } else { // currently we're only interested in WMA Decoder if (dmt.majortype == MEDIATYPE_Audio && ( dmt.subtype == GraphStudio::MEDIASUBTYPE_WMA9_00 || dmt.subtype == GraphStudio::MEDIASUBTYPE_WMA9_01 || dmt.subtype == GraphStudio::MEDIASUBTYPE_WMA9_02 || dmt.subtype == GraphStudio::MEDIASUBTYPE_WMA9_03 ) ){ dmo_type = 0; // WMA } MoFreeMediaType(&dmt); } CComPtr<IPropertyPage> page; switch (dmo_type) { case 0: { CWMADecPage *wma_page; wma_page = new CWMADecPage(NULL, &hr, _T("WMA Decoder")); if (wma_page) { wma_page->AddRef(); hr = wma_page->QueryInterface(IID_IPropertyPage, (void**)&page); if (SUCCEEDED(hr)) { hr = page->SetObjects(1, &obj); if (SUCCEEDED(hr)) { container->AddPage(page); } } page = NULL; wma_page->Release(); } } break; } return 0; }
void MainDialog::OnSelchangeListOutputStreams() { HRESULT hr; TCHAR sz[64]; UINT nSel; DWORD dwFlags=0; m_ListInputStreams.GetCurrentSelection(&nSel); // // Display relevant information about output stream // if (!m_pDMO) { return; } // Read output stream information flags hr = m_pDMO->GetOutputStreamInfo(nSel, &dwFlags); if (FAILED(hr)) { MessageBeep(0); return; } // Set stream info checkboxes m_CheckOutWholeSamples.SetCheck(dwFlags & DMO_OUTPUT_STREAMF_WHOLE_SAMPLES); m_CheckOutOneSample.SetCheck(dwFlags & DMO_OUTPUT_STREAMF_SINGLE_SAMPLE_PER_BUFFER); m_CheckOutFixedSize.SetCheck(dwFlags & DMO_OUTPUT_STREAMF_FIXED_SAMPLE_SIZE); m_CheckOutDiscardable.SetCheck(dwFlags & DMO_OUTPUT_STREAMF_DISCARDABLE); m_CheckOutOptional.SetCheck(dwFlags & DMO_OUTPUT_STREAMF_OPTIONAL); // Read preferred output type information DMO_MEDIA_TYPE dmt={0}; hr = m_pDMO->GetOutputType(nSel, 0, &dmt); if (SUCCEEDED(hr)) { hr = GetFormatString(sz, NUMELMS(sz), &dmt); } if (SUCCEEDED(hr)) { SetDlgItemText(IDC_STATIC_OUT_FORMAT, sz); } if (SUCCEEDED(hr)) { hr = GetGUIDString(sz, NUMELMS(sz), &dmt.majortype); } if (SUCCEEDED(hr)) { SetDlgItemText(IDC_STATIC_OUT_TYPE, sz); } if (SUCCEEDED(hr)) { hr = GetGUIDString(sz, NUMELMS(sz), &dmt.subtype); } if (SUCCEEDED(hr)) { SetDlgItemText(IDC_STATIC_OUT_SUBYTPE, sz); } MoFreeMediaType(&dmt); // Does this DMO support quality control? IDMOQualityControl *pQC=0; hr = m_pDMO->QueryInterface(IID_IDMOQualityControl, (void **) &pQC); if (SUCCEEDED(hr)) { m_CheckOutQC.SetCheck(TRUE); pQC->Release(); } }
void MainDialog::OnSelchangeListInputStreams() { HRESULT hr; TCHAR sz[64]; UINT nSel; DWORD dwFlags=0; m_ListInputStreams.GetCurrentSelection(&nSel); // // Display relevant information about input stream // if (!m_pDMO) { return; } // Read input stream information flags hr = m_pDMO->GetInputStreamInfo(nSel, &dwFlags); if (FAILED(hr)) { MessageBeep(0); return; } // Set stream info checkboxes m_CheckInWholeSamples.SetCheck(dwFlags & DMO_INPUT_STREAMF_WHOLE_SAMPLES); m_CheckInOneSample.SetCheck(dwFlags & DMO_INPUT_STREAMF_SINGLE_SAMPLE_PER_BUFFER); m_CheckInFixedSize.SetCheck(dwFlags & DMO_INPUT_STREAMF_FIXED_SAMPLE_SIZE); m_CheckInHoldsBuffers.SetCheck(dwFlags & DMO_INPUT_STREAMF_HOLDS_BUFFERS); // Read preferred input type information. The media type/subtypes // are arranged in order of preference, starting from zero. DMO_MEDIA_TYPE dmt={0}; hr = m_pDMO->GetInputType(nSel, 0, &dmt); if (SUCCEEDED(hr)) { hr = GetFormatString(sz, NUMELMS(sz), &dmt); } if (SUCCEEDED(hr)) { SetDlgItemText(IDC_STATIC_IN_FORMAT, sz); } if (SUCCEEDED(hr)) { hr = GetGUIDString(sz, NUMELMS(sz), &dmt.majortype); } if (SUCCEEDED(hr)) { SetDlgItemText(IDC_STATIC_IN_TYPE, sz); } if (SUCCEEDED(hr)) { hr = GetGUIDString(sz, NUMELMS(sz), &dmt.subtype); } if (SUCCEEDED(hr)) { SetDlgItemText(IDC_STATIC_IN_SUBTYPE, sz); } MoFreeMediaType(&dmt); // Does this DMO support quality control? IDMOQualityControl *pQC=0; hr = m_pDMO->QueryInterface(IID_IDMOQualityControl, (void **) &pQC); if (SUCCEEDED(hr)) { m_CheckInQC.SetCheck(TRUE); pQC->Release(); } }
HRESULT KinectImpl::initAudio(){ HRESULT hr = S_OK; // Set high priority to avoid getting preempted while capturing sound #if 1 DWORD m_TaskIndex=0; h_TaskIndex = AvSetMmThreadCharacteristicsW(L"Audio", &m_TaskIndex); if (h_TaskIndex == NULL) { hr = E_FAIL; puts("Failed to set thread priority\n"); return hr; } #endif // DMO initialization INuiAudioBeam* pAudio; hr = m_pNuiSensor->NuiGetAudioSource(&pAudio); if (FAILED(hr)) { puts("Failed to NuiGetAudioSource\n"); return hr; } hr = pAudio->QueryInterface(IID_IMediaObject, (void**)&pMediaObject); if (FAILED(hr)) { puts("Fail to call pAudio->QueryInterface(pMediaObject)\n"); return hr; } hr = pAudio->QueryInterface(IID_IPropertyStore, (void**)&pPropertyStore); if (FAILED(hr)) { puts("Fail to call pAudio->QueryInterface(pPropertyStore)\n"); return hr; } pAudio->Release(); // Set AEC-MicArray DMO system mode. // This must be set for the DMO to work properly PROPVARIANT pvSysMode; PropVariantInit(&pvSysMode); pvSysMode.vt = VT_I4; pvSysMode.lVal = (LONG)(4); hr = pPropertyStore->SetValue(MFPKEY_WMAAECMA_SYSTEM_MODE, pvSysMode); if (FAILED(hr)) { puts("Fail to call pPS->SetValue\n"); return hr; } PropVariantClear(&pvSysMode); // NOTE: Need to wait 4 seconds for device to be ready right after initialization DWORD dwWait = 4; while (dwWait > 0){ _tprintf(_T("Device will be ready for recording in %d second(s).\r"), dwWait--); Sleep(1000); } _tprintf(_T("Device is ready. \n")); /*** set output buffer ***/ memset(&OutputBufferStruct,0, sizeof(OutputBufferStruct)); OutputBufferStruct.pBuffer = &outputBuffer; // Set DMO output format WAVEFORMATEX wfxOut = {WAVE_FORMAT_PCM, 1, 16000, 32000, 2, 16, 0}; DMO_MEDIA_TYPE mt = {0}; hr = MoInitMediaType(&mt, sizeof(WAVEFORMATEX)); if (FAILED(hr)) { puts("MoInitMediaType failed\n"); return hr; } mt.majortype = MEDIATYPE_Audio; mt.subtype = MEDIASUBTYPE_PCM; mt.lSampleSize = 0; mt.bFixedSizeSamples = TRUE; mt.bTemporalCompression = FALSE; mt.formattype = FORMAT_WaveFormatEx; memcpy(mt.pbFormat, &wfxOut, sizeof(WAVEFORMATEX)); hr = pMediaObject->SetOutputType(0, &mt, 0); if (FAILED(hr)) { puts("SetOutputType failed\n"); return hr; } MoFreeMediaType(&mt); hr = pMediaObject->AllocateStreamingResources(); if (FAILED(hr)) { puts("AllocateStreamingResources failed\n"); return hr; } int iFrameSize; PROPVARIANT pvFrameSize; PropVariantInit(&pvFrameSize); hr = pPropertyStore->GetValue(MFPKEY_WMAAECMA_FEATR_FRAME_SIZE, &pvFrameSize); if (FAILED(hr)) { puts("Fail to call pPS->GetValue\n"); return hr; } iFrameSize = pvFrameSize.lVal; PropVariantClear(&pvFrameSize); // allocate output buffer DWORD cOutputBufLen = wfxOut.nSamplesPerSec * wfxOut.nBlockAlign; pbOutputBuffer = new BYTE[cOutputBufLen]; if (pbOutputBuffer == NULL) { puts("Fail to allocate output buffer.\n"); return hr; } // hr = pMediaObject->QueryInterface(IID_INuiAudioBeam, (void**)&pAudioBeam); if (FAILED(hr)) { puts("QueryInterface for IID_INuiAudioBeam failed\n"); return hr; } outputBuffer.Init((byte*)pbOutputBuffer, cOutputBufLen, 0); return S_OK; }
/// <summary> /// Initialize Kinect audio stream object. /// </summary> /// <returns> /// <para>S_OK on success, otherwise failure code.</para> /// </returns> HRESULT KinectReader::InitializeAudioStream() { INuiAudioBeam* pNuiAudioSource = NULL; IMediaObject* pDMO = NULL; IPropertyStore* pPropertyStore = NULL; IStream* pStream = NULL; // Get the audio source HRESULT hr = m_pNuiSensor->NuiGetAudioSource(&pNuiAudioSource); if (SUCCEEDED(hr)) { hr = pNuiAudioSource->QueryInterface(IID_IMediaObject, (void**)&pDMO); if (SUCCEEDED(hr)) { hr = pNuiAudioSource->QueryInterface(IID_IPropertyStore, (void**)&pPropertyStore); // Set AEC-MicArray DMO system mode. This must be set for the DMO to work properly. // Possible values are: // SINGLE_CHANNEL_AEC = 0 // OPTIBEAM_ARRAY_ONLY = 2 // OPTIBEAM_ARRAY_AND_AEC = 4 // SINGLE_CHANNEL_NSAGC = 5 PROPVARIANT pvSysMode; PropVariantInit(&pvSysMode); pvSysMode.vt = VT_I4; pvSysMode.lVal = (LONG)(2); // Use OPTIBEAM_ARRAY_ONLY setting. Set OPTIBEAM_ARRAY_AND_AEC instead if you expect to have sound playing from speakers. pPropertyStore->SetValue(MFPKEY_WMAAECMA_SYSTEM_MODE, pvSysMode); PropVariantClear(&pvSysMode); // Set DMO output format WAVEFORMATEX wfxOut = {AudioFormat, AudioChannels, AudioSamplesPerSecond, AudioAverageBytesPerSecond, AudioBlockAlign, AudioBitsPerSample, 0}; DMO_MEDIA_TYPE mt = {0}; MoInitMediaType(&mt, sizeof(WAVEFORMATEX)); mt.majortype = MEDIATYPE_Audio; mt.subtype = MEDIASUBTYPE_PCM; mt.lSampleSize = 0; mt.bFixedSizeSamples = TRUE; mt.bTemporalCompression = FALSE; mt.formattype = FORMAT_WaveFormatEx; memcpy(mt.pbFormat, &wfxOut, sizeof(WAVEFORMATEX)); hr = pDMO->SetOutputType(0, &mt, 0); if (SUCCEEDED(hr)) { m_pKinectAudioStream = new KinectAudioStream(pDMO); hr = m_pKinectAudioStream->QueryInterface(IID_IStream, (void**)&pStream); if (SUCCEEDED(hr)) { hr = CoCreateInstance(CLSID_SpStream, NULL, CLSCTX_INPROC_SERVER, __uuidof(ISpStream), (void**)&m_pSpeechStream); if (SUCCEEDED(hr)) { hr = m_pSpeechStream->SetBaseStream(pStream, SPDFID_WaveFormatEx, &wfxOut); } } } MoFreeMediaType(&mt); } } SafeRelease(pStream); SafeRelease(pPropertyStore); SafeRelease(pDMO); SafeRelease(pNuiAudioSource); return hr; }
/// <summary> /// Start processing stream /// </summary> /// <returns>Indicates success or failure</returns> HRESULT NuiAudioStream::StartStream() { // Get audio source interface HRESULT hr = m_pNuiSensor->NuiGetAudioSource(&m_pNuiAudioSource); if (FAILED(hr)) { return hr; } // Query dmo interface hr = m_pNuiAudioSource->QueryInterface(IID_IMediaObject, (void**)&m_pDMO); if (FAILED(hr)) { return hr; } // Query property store interface hr = m_pNuiAudioSource->QueryInterface(IID_IPropertyStore, (void**)&m_pPropertyStore); if (FAILED(hr)) { return hr; } // Set AEC-MicArray DMO system mode. This must be set for the DMO to work properly. // Possible values are: // SINGLE_CHANNEL_AEC = 0 // OPTIBEAM_ARRAY_ONLY = 2 // OPTIBEAM_ARRAY_AND_AEC = 4 // SINGLE_CHANNEL_NSAGC = 5 PROPVARIANT pvSysMode; // Initialize the variable PropVariantInit(&pvSysMode); // Assign properties pvSysMode.vt = VT_I4; pvSysMode.lVal = (LONG)(2); // Use OPTIBEAM_ARRAY_ONLY setting. Set OPTIBEAM_ARRAY_AND_AEC instead if you expect to have sound playing from speakers. // Set properties m_pPropertyStore->SetValue(MFPKEY_WMAAECMA_SYSTEM_MODE, pvSysMode); // Release the variable PropVariantClear(&pvSysMode); // Set DMO output format WAVEFORMATEX wfxOut = {AudioFormat, AudioChannels, AudioSamplesPerSecond, AudioAverageBytesPerSecond, AudioBlockAlign, AudioBitsPerSample, 0}; memcpy_s(&m_wfxOut,sizeof(WAVEFORMATEX),&wfxOut,sizeof(WAVEFORMATEX)); DMO_MEDIA_TYPE mt = {0}; // Initialize variable MoInitMediaType(&mt, sizeof(WAVEFORMATEX)); // Assign format mt.majortype = MEDIATYPE_Audio; mt.subtype = MEDIASUBTYPE_PCM; mt.lSampleSize = 0; mt.bFixedSizeSamples = TRUE; mt.bTemporalCompression = FALSE; mt.formattype = FORMAT_WaveFormatEx; memcpy_s(mt.pbFormat, sizeof(WAVEFORMATEX), &m_wfxOut, sizeof(WAVEFORMATEX)); // Set format hr = m_pDMO->SetOutputType(0, &mt, 0); // Release variable MoFreeMediaType(&mt); return hr; }
/** @brief Open stream @param mode AEC_SYSTEM_MODE */ void AudioStream::Open(AEC_SYSTEM_MODE mode /*= OPTIBEAM_ARRAY_ONLY */) { #ifndef USES_KINECT_AUDIOSTREAM return; #else if((mode == ADAPTIVE_ARRAY_ONLY) || (mode == ADAPTIVE_ARRAY_AND_AEC) || (mode == MODE_NOT_SET)){ return; } CoInitialize(NULL); isOpen = true; HRESULT ret = sensor_->NuiGetAudioSource(&audioBeam_); if(FAILED(ret)){ return; } ret = audioBeam_->QueryInterface(IID_IMediaObject, (void**)&mediaObject_); if(FAILED(ret)){ return; } ret = audioBeam_->QueryInterface(IID_IPropertyStore, (void**)&propertyStore_); if(FAILED(ret)){ return; } /* if(audioBeam_ != NULL){ audioBeam_->Release(); audioBeam_ = NULL; }*/ PROPVARIANT pvSysMode; PropVariantInit(&pvSysMode); pvSysMode.vt = VT_I4; pvSysMode.lVal = (LONG)mode; ret = propertyStore_->SetValue(MFPKEY_WMAAECMA_SYSTEM_MODE, pvSysMode); if(FAILED(ret)){ return; } PropVariantClear(&pvSysMode); DMO_MEDIA_TYPE mt = {0}; // Set DMO output format ret = MoInitMediaType(&mt, sizeof(WAVEFORMATEX)); if(FAILED(ret)){ return; } mt.majortype = MEDIATYPE_Audio; mt.subtype = MEDIASUBTYPE_PCM; mt.lSampleSize = 0; mt.bFixedSizeSamples = TRUE; mt.bTemporalCompression = FALSE; mt.formattype = FORMAT_WaveFormatEx; memcpy(mt.pbFormat, &GetWaveFormat(), sizeof(WAVEFORMATEX)); ret = mediaObject_->SetOutputType(0, &mt, 0); if(FAILED(ret)){ return; } MoFreeMediaType(&mt); // Allocate streaming resources. This step is optional. If it is not called here, it // will be called when first time ProcessInput() is called. However, if you want to // get the actual frame size being used, it should be called explicitly here. ret = mediaObject_->AllocateStreamingResources(); if(FAILED(ret)){ return; } // Get actually frame size being used in the DMO. (optional, do as you need) int iFrameSize; PROPVARIANT pvFrameSize; PropVariantInit(&pvFrameSize); propertyStore_->GetValue(MFPKEY_WMAAECMA_FEATR_FRAME_SIZE, &pvFrameSize); if(FAILED(ret)){ return; } iFrameSize = pvFrameSize.lVal; PropVariantClear(&pvFrameSize); ret = mediaObject_->QueryInterface(IID_INuiAudioBeam, (void**)&audioBeam_); if(FAILED(ret)){ return; } memset(&outputBufferStruct_, 0, sizeof(outputBufferStruct_)); outputBufferStruct_.pBuffer = &outputMediaBuffer_; outputMediaBuffer_.SetBufferLength(GetWaveFormat().nSamplesPerSec * GetWaveFormat().nBlockAlign); #endif }
int _tmain( int argc, _TCHAR* argv[] ) { cv::setUseOptimized( true ); // Kinectのインスタンス生成、初期化 INuiSensor* pSensor; HRESULT hResult = S_OK; hResult = NuiCreateSensorByIndex( 0, &pSensor ); if( FAILED( hResult ) ){ std::cerr << "Error : NuiCreateSensorByIndex" << std::endl; return -1; } hResult = pSensor->NuiInitialize( NUI_INITIALIZE_FLAG_USES_AUDIO ); if( FAILED( hResult ) ){ std::cerr << "Error : NuiInitialize" << std::endl; return -1; } // Audioストリームの初期化(InitializeAudioStream) std::cout << "InitializeAudioStream" << std::endl; INuiAudioBeam* pNuiAudioSource; hResult = pSensor->NuiGetAudioSource( &pNuiAudioSource ); if( FAILED( hResult ) ){ std::cerr << "Error : NuiGetAudioSource" << std::endl; return -1; } IMediaObject* pMediaObject = nullptr; IPropertyStore* pPropertyStore = nullptr; pNuiAudioSource->QueryInterface( IID_IMediaObject, reinterpret_cast<void**>( &pMediaObject ) ); pNuiAudioSource->QueryInterface( IID_IPropertyStore, reinterpret_cast<void**>( &pPropertyStore ) ); PROPVARIANT propvariant; PropVariantInit( &propvariant ); propvariant.vt = VT_I4; propvariant.lVal = static_cast<LONG>( 4 ); pPropertyStore->SetValue( MFPKEY_WMAAECMA_SYSTEM_MODE, propvariant ); PropVariantClear( &propvariant ); WAVEFORMATEX waveFormat = { AudioFormat, AudioChannels, AudioSamplesPerSecond, AudioAverageBytesPerSecond, AudioBlockAlign, AudioBitsPerSample, 0 }; DMO_MEDIA_TYPE mediaType = { 0 }; MoInitMediaType( &mediaType, sizeof( WAVEFORMATEX ) ); mediaType.majortype = MEDIATYPE_Audio; mediaType.subtype = MEDIASUBTYPE_PCM; mediaType.lSampleSize = 0; mediaType.bFixedSizeSamples = true; mediaType.bTemporalCompression = false; mediaType.formattype = FORMAT_WaveFormatEx; memcpy( mediaType.pbFormat, &waveFormat, sizeof( WAVEFORMATEX ) ); pMediaObject->SetOutputType( 0, &mediaType, 0 ); KinectAudioStream* audioStream = new KinectAudioStream( pMediaObject ); IStream* pStream = nullptr; audioStream->QueryInterface( IID_IStream, reinterpret_cast<void**>( &pStream ) ); CoInitialize( nullptr ); ISpStream* pSpeechStream = nullptr; CoCreateInstance( CLSID_SpStream, NULL, CLSCTX_INPROC_SERVER, __uuidof(ISpStream), reinterpret_cast<void**>( &pSpeechStream ) ); pSpeechStream->SetBaseStream( pStream, SPDFID_WaveFormatEx, &waveFormat ); MoFreeMediaType( &mediaType ); pStream->Release(); pPropertyStore->Release(); pMediaObject->Release(); pNuiAudioSource->Release(); // 音声認識器を作成(CreateSpeechRecognizer) std::cout << "CreateSpeechRecognizer" << std::endl; ISpRecognizer* pSpeechRecognizer; CoCreateInstance( CLSID_SpInprocRecognizer, nullptr, CLSCTX_INPROC_SERVER, __uuidof(ISpRecognizer), reinterpret_cast<void**>( &pSpeechRecognizer ) ); pSpeechRecognizer->SetInput( pSpeechStream, false ); /* // If can use ATL, easier to using SpFindBestToken(sphelper.h). When using Professional or more. ISpObjectToken* pEngineToken = nullptr; SpFindBestToken( SPCAT_RECOGNIZERS, L"Language=411;Kinect=True", NULL, &pEngineToken ); // Japanese "Language=411;Kinect=True" English "Language=409;Kinect=True" */ ///* // If can't use ATL, alternative to using SpFIndBestToken(sphelper.h). When using Express. const wchar_t* pVendorPreferred = L"VendorPreferred"; const unsigned long lengthVendorPreferred = static_cast<unsigned long>( wcslen( pVendorPreferred ) ); unsigned long length; ULongAdd( lengthVendorPreferred, 1, &length ); wchar_t* pAttribsVendorPreferred = new wchar_t[ length ]; StringCchCopyW( pAttribsVendorPreferred, length, pVendorPreferred ); ISpObjectTokenCategory* pTokenCategory = nullptr; CoCreateInstance( CLSID_SpObjectTokenCategory, nullptr, CLSCTX_ALL, __uuidof(ISpObjectTokenCategory), reinterpret_cast<void**>( &pTokenCategory ) ); pTokenCategory->SetId( SPCAT_RECOGNIZERS, false ); IEnumSpObjectTokens* pEnumTokens = nullptr; CoCreateInstance( CLSID_SpMMAudioEnum, nullptr, CLSCTX_ALL, __uuidof(IEnumSpObjectTokens), reinterpret_cast<void**>( &pEnumTokens ) ); pTokenCategory->EnumTokens( L"Language=411;Kinect=True", pAttribsVendorPreferred, &pEnumTokens ); // Japanese "Language=411;Kinect=True" English "Language=409;Kinect=True" delete[] pAttribsVendorPreferred; ISpObjectToken* pEngineToken = nullptr; pEnumTokens->Next( 1, &pEngineToken, nullptr ); //*/ pSpeechRecognizer->SetRecognizer( pEngineToken ); ISpRecoContext* pSpeechContext; pSpeechRecognizer->CreateRecoContext( &pSpeechContext ); pEngineToken->Release(); ///* pTokenCategory->Release(); pEnumTokens->Release(); //*/ // 音声認識辞書の作成(LoadSpeechGrammar) std::cout << "LoadSpeechGrammar" << std::endl; ISpRecoGrammar* pSpeechGrammar; pSpeechContext->CreateGrammar( 1, &pSpeechGrammar ); pSpeechGrammar->LoadCmdFromFile( L"SpeechRecognition_Ja.grxml", /*SPLO_STATIC*/SPLO_DYNAMIC ); // http://www.w3.org/TR/speech-grammar/ (UTF-8/CRLF) audioStream->StartCapture(); pSpeechGrammar->SetRuleState( nullptr, nullptr, SPRS_ACTIVE ); pSpeechRecognizer->SetRecoState( SPRST_ACTIVE_ALWAYS ); pSpeechContext->SetInterest( SPFEI( SPEI_RECOGNITION ), SPFEI( SPEI_RECOGNITION ) ); pSpeechContext->Resume( 0 ); HANDLE hSpeechEvent = INVALID_HANDLE_VALUE; hSpeechEvent = pSpeechContext->GetNotifyEventHandle(); HANDLE hEvents[1] = { hSpeechEvent }; int width = 640; int height = 480; cv::Mat audioMat = cv::Mat::zeros( height, width, CV_8UC3 ); cv::namedWindow( "Audio" ); bool exit = false; std::cout << std::endl << "Speech Recognition Start..." << std::endl << std::endl; while( 1 ){ // イベントの更新待ち ResetEvent( hSpeechEvent ); unsigned long waitObject = MsgWaitForMultipleObjectsEx( ARRAYSIZE( hEvents ), hEvents, INFINITE, QS_ALLINPUT, MWMO_INPUTAVAILABLE ); if( waitObject == WAIT_OBJECT_0 ){ // イベントの取得 const float confidenceThreshold = 0.3f; SPEVENT eventStatus; unsigned long eventFetch = 0; pSpeechContext->GetEvents( 1, &eventStatus, &eventFetch ); while( eventFetch > 0 ){ switch( eventStatus.eEventId ){ // 音声認識イベント(SPEI_HYPOTHESIS:推定またはSPEI_RECOGNITION:認識) case SPEI_HYPOTHESIS: case SPEI_RECOGNITION: if( eventStatus.elParamType == SPET_LPARAM_IS_OBJECT ){ // フレーズの取得 ISpRecoResult* pRecoResult = reinterpret_cast<ISpRecoResult*>( eventStatus.lParam ); SPPHRASE* pPhrase = nullptr; hResult = pRecoResult->GetPhrase( &pPhrase ); if( SUCCEEDED( hResult ) ){ if( ( pPhrase->pProperties != nullptr ) && ( pPhrase->pProperties->pFirstChild != nullptr ) ){ // 辞書のフレーズタグと比較 const SPPHRASEPROPERTY* pSemantic = pPhrase->pProperties->pFirstChild; if( pSemantic->SREngineConfidence > confidenceThreshold ){ if( wcscmp( L"あか", pSemantic->pszValue ) == 0 ){ std::cout << "あか" << std::endl; audioMat = cv::Scalar( 0, 0, 255 ); } else if( wcscmp( L"みどり", pSemantic->pszValue ) == 0 ){ std::cout << "みどり" << std::endl; audioMat = cv::Scalar( 0, 255, 0 ); } else if( wcscmp( L"あお", pSemantic->pszValue ) == 0 ){ std::cout << "あお" << std::endl; audioMat = cv::Scalar( 255, 0, 0 ); } else if( wcscmp( L"おわり", pSemantic->pszValue ) == 0 ){ exit = true; } } } CoTaskMemFree( pPhrase ); } } break; default: break; } pSpeechContext->GetEvents( 1, &eventStatus, &eventFetch ); } } // 表示 cv::imshow( "Audio", audioMat ); // ループの終了判定(Escキー) if( cv::waitKey( 30 ) == VK_ESCAPE || exit ){ break; } } // 終了処理 audioStream->StopCapture(); pSpeechRecognizer->SetRecoState( SPRST_INACTIVE ); CoUninitialize(); pSensor->NuiShutdown(); CloseHandle( hSpeechEvent ); cv::destroyAllWindows(); return 0; }
void dmo_GetOutputType_decoder_inset_(REFGUID clsidEnc, REFGUID clsidDec, REFGUID guidRaw, const vector<GUID> &expected) { HRESULT hr; IMediaObject *pObj; vector<GUID> outTypes; DMO_MEDIA_TYPE mt; VIDEOINFOHEADER *pvih; DWORD fccIn = DirectShowFormatToVCMFormat(guidRaw); hr = CoCreateInstance(clsidEnc, NULL, CLSCTX_INPROC_SERVER, IID_IMediaObject, (LPVOID*)&pObj); BOOST_REQUIRE(hr == S_OK); BOOST_REQUIRE(pObj != NULL); memset(&mt, 0, sizeof(mt)); MoInitMediaType(&mt, sizeof(VIDEOINFOHEADER)); pvih = (VIDEOINFOHEADER*)mt.pbFormat; memset(pvih, 0, sizeof(VIDEOINFOHEADER)); mt.majortype = MEDIATYPE_Video; mt.subtype = guidRaw; mt.bFixedSizeSamples = TRUE; mt.bTemporalCompression = FALSE; mt.lSampleSize = 10000000; /* XXX */ mt.formattype = FORMAT_VideoInfo; pvih->bmiHeader.biSize = sizeof(BITMAPINFOHEADER); pvih->bmiHeader.biWidth = TEST_WIDTH; pvih->bmiHeader.biHeight = TEST_HEIGHT; pvih->bmiHeader.biPlanes = 1; pvih->bmiHeader.biBitCount = FCC2BitCount(fccIn); pvih->bmiHeader.biCompression = FCC2Compression(fccIn); pvih->bmiHeader.biSizeImage = 10000000; /* XXX */ hr = pObj->SetInputType(0, &mt, 0); BOOST_REQUIRE(hr == S_OK); MoFreeMediaType(&mt); hr = pObj->GetOutputType(0, 0, &mt); BOOST_REQUIRE(hr == S_OK); pObj->Release(); hr = CoCreateInstance(clsidDec, NULL, CLSCTX_INPROC_SERVER, IID_IMediaObject, (LPVOID*)&pObj); BOOST_REQUIRE(hr == S_OK); BOOST_REQUIRE(pObj != NULL); hr = pObj->SetInputType(0, &mt, 0); BOOST_REQUIRE(hr == S_OK); for (DWORD idx = 0; (hr = pObj->GetOutputType(0, idx, &mt)) == S_OK; ++idx) { BOOST_CHECK(mt.majortype == MEDIATYPE_Video); BOOST_CHECK(mt.bFixedSizeSamples == TRUE); BOOST_CHECK(mt.bTemporalCompression == FALSE); BOOST_CHECK(mt.formattype == FORMAT_VideoInfo); DWORD fccOut = DirectShowFormatToVCMFormat(mt.subtype); pvih = (VIDEOINFOHEADER*)mt.pbFormat; BOOST_CHECK(pvih->bmiHeader.biSize >= sizeof(BITMAPINFOHEADER)); BOOST_CHECK(pvih->bmiHeader.biWidth == TEST_WIDTH); BOOST_CHECK(pvih->bmiHeader.biHeight == TEST_HEIGHT); BOOST_CHECK(pvih->bmiHeader.biPlanes == 1); BOOST_CHECK(pvih->bmiHeader.biBitCount == FCC2BitCount(fccOut)); BOOST_CHECK(pvih->bmiHeader.biCompression == FCC2Compression(fccOut)); outTypes.push_back(mt.subtype); MoFreeMediaType(&mt); } BOOST_CHECK(hr == DMO_E_NO_MORE_ITEMS); BOOST_CHECK_EQUAL(outTypes, expected); pObj->Release(); }