void CTTSApp::MainHandleCommand( int id, HWND hWndControl, UINT codeNotify )
/////////////////////////////////////////////////////////////////
//
// Handle each of the WM_COMMAND messages that come in, and deal with
// them appropriately
//
{
    UINT                cNumChar = 0;
    HRESULT             hr = S_OK;
    TCHAR               szAFileName[NORM_SIZE] = _T("");
    static BOOL         bIsUnicode = FALSE;
    BOOL                bWavFileOpened = FALSE;
    LRESULT             iFormat;
    CComPtr<ISpStream>  cpWavStream;
    CComPtr<ISpStreamFormat>    cpOldStream;
    HWND                hwndEdit;
    BOOL                bFileOpened = FALSE;

    // Get handle to the main edit box
    hwndEdit = GetDlgItem( m_hWnd, IDE_EDITBOX );

    switch(id)
    {
        // About Box display
        case IDC_ABOUT:
            ::DialogBox( m_hInst, (LPCTSTR)IDD_ABOUT, m_hWnd, (DLGPROC)About );
            break;

        // Any change to voices is sent to VoiceChange() function
        case IDC_COMBO_VOICES:
            if( codeNotify == CBN_SELCHANGE )
            {
                hr = VoiceChange();
            }

            if( FAILED( hr ) )
            {
                TTSAppStatusMessage( m_hWnd, _T("Error changing voices\r\n") );
            }

            break;

        // If user wants to speak a file pop the standard windows open file
        // dialog box and load the text into a global buffer (m_pszwFileText)
        // which will be used when the user hits speak.
        case IDB_OPEN:
            bFileOpened = CallOpenFileDialog( szAFileName,
                        _T("TXT (*.txt)\0*.txt\0XML (*.xml)\0*.xml\0All Files (*.*)\0*.*\0") );
            if( bFileOpened )
            {
                DWORD   dwFileSize = 0;
                
                wcscpy_s( m_szWFileName, _countof(m_szWFileName), CT2W( szAFileName ) );
                ReadTheFile( szAFileName, &bIsUnicode, &m_pszwFileText );
                
                if( bIsUnicode )
                {
                    // Unicode source
                    UpdateEditCtlW( m_pszwFileText );
                }
                else
                {
                    // MBCS source
#ifdef _UNICODE
                    LPTSTR pszFileText = _tcsdup( m_pszwFileText );
#else
                    // We're compiling ANSI, so we need to convert the string to MBCS
                    // Note that a W2T may not be good here, since this string might 
                    // be very big
                    LPTSTR pszFileText = NULL;
                    int iNeeded = ::WideCharToMultiByte( CP_ACP, 0, m_pszwFileText, -1, NULL, 0, NULL, NULL );
                    pszFileText = (LPTSTR) ::malloc( sizeof( TCHAR ) * ( iNeeded + 1 ) );
                    ::WideCharToMultiByte( CP_ACP, 0, m_pszwFileText, -1, pszFileText, iNeeded + 1, NULL, NULL );
#endif
                    if ( pszFileText )
                    {
                        SetDlgItemText( m_hWnd, IDE_EDITBOX, pszFileText );
                        free( pszFileText );
                    }

                }
            }
            else
            {
                wcscpy_s( m_szWFileName, _countof(m_szWFileName), L"" );
            }
            // Always SetFocus back to main edit window so text highlighting will work
            SetFocus( hwndEdit );
            break;
        
        // Handle speak
        case IDB_SPEAK:
            HandleSpeak();
            break;

        case IDB_PAUSE:
            if( !m_bStop )
            {
                if( !m_bPause )
                {
                    SetWindowText( GetDlgItem( m_hWnd, IDB_PAUSE ), _T("Resume") );
                    // Pause the voice...
                    m_cpVoice->Pause();
                    m_bPause = TRUE;
                    TTSAppStatusMessage( m_hWnd, _T("Pause\r\n") );
                }
                else
                {
                    SetWindowText( GetDlgItem( m_hWnd, IDB_PAUSE ), _T("Pause") );
                    m_cpVoice->Resume();
                    m_bPause = FALSE;
                }
            }
            SetFocus( hwndEdit );
            break;

        case IDB_STOP:
            TTSAppStatusMessage( m_hWnd, _T("Stop\r\n") );
            // Set the global audio state to stop
            Stop();
            SetFocus( hwndEdit );
            break;

        case IDB_SKIP:
            {
                SetFocus( hwndEdit );
                int fSuccess = false;
                int SkipNum = GetDlgItemInt( m_hWnd, IDC_SKIP_EDIT, &fSuccess, true );
                ULONG ulGarbage = 0;
                WCHAR szGarbage[] = L"Sentence";
                if ( fSuccess )
                {
                    TTSAppStatusMessage( m_hWnd, _T("Skip\r\n") );
                    m_cpVoice->Skip( szGarbage, SkipNum, &ulGarbage );
                }
                else
                {
                    TTSAppStatusMessage( m_hWnd, _T("Skip failed\r\n") );
                }
                break;
            }

        case IDE_EDITBOX:
            // Set the global audio state to stop if user has changed contents of edit control
            if( codeNotify == EN_CHANGE )
            {
                Stop();
            }
            break;

        case IDB_SPEAKWAV:
            bWavFileOpened = CallOpenFileDialog( szAFileName,
                         _T("WAV (*.wav)\0*.wav\0All Files (*.*)\0*.*\0") );
            // Speak the wav file using SpeakStream
            if( bWavFileOpened )
            {
                WCHAR                       szwWavFileName[NORM_SIZE] = L"";;

                wcscpy_s( szwWavFileName, _countof(szwWavFileName), CT2W( szAFileName ) );

                // User helper function found in sphelper.h to open the wav file and
                // get back an IStream pointer to pass to SpeakStream
                hr = SPBindToFile( szwWavFileName, SPFM_OPEN_READONLY, &cpWavStream );

                if( SUCCEEDED( hr ) )
                {
                    hr = m_cpVoice->SpeakStream( cpWavStream, SPF_ASYNC, NULL );
                }

                if( FAILED( hr ) )
                {
                    TTSAppStatusMessage( m_hWnd, _T("Speak error\r\n") );
                }
            }
            break;

        // Reset all values to defaults
        case IDB_RESET:
            TTSAppStatusMessage( m_hWnd, _T("Reset\r\n") );
            SendDlgItemMessage( m_hWnd, IDC_VOLUME_SLIDER, TBM_SETPOS, TRUE, m_DefaultVolume );
            SendDlgItemMessage( m_hWnd, IDC_RATE_SLIDER, TBM_SETPOS, TRUE, m_DefaultRate );
            SendDlgItemMessage( m_hWnd, IDC_SAVETOWAV, BM_SETCHECK, BST_UNCHECKED, 0 );
            SendDlgItemMessage( m_hWnd, IDC_EVENTS, BM_SETCHECK, BST_UNCHECKED, 0 );
            SetDlgItemText( m_hWnd, IDE_EDITBOX, _T("Enter text you wish spoken here.") );

            // reset output format
            SendDlgItemMessage( m_hWnd, IDC_COMBO_OUTPUT, CB_SETCURSEL, m_DefaultFormatIndex, 0 );
            SendMessage( m_hWnd, WM_COMMAND, MAKEWPARAM(IDC_COMBO_OUTPUT, CBN_SELCHANGE), 0 );

            // Change the volume and the rate to reflect what the UI says
            HandleScroll( ::GetDlgItem( m_hWnd, IDC_VOLUME_SLIDER ) );
            HandleScroll( ::GetDlgItem( m_hWnd, IDC_RATE_SLIDER ) );

            SetFocus( hwndEdit );
            break;

        case IDC_COMBO_OUTPUT:
            if( codeNotify == CBN_SELCHANGE )
            {
                // Get the audio output format and set it's GUID
                iFormat  = SendDlgItemMessage( m_hWnd, IDC_COMBO_OUTPUT, CB_GETCURSEL, 0, 0 );
                SPSTREAMFORMAT eFmt = (SPSTREAMFORMAT)SendDlgItemMessage( m_hWnd, IDC_COMBO_OUTPUT,
                                                        CB_GETITEMDATA, iFormat, 0 );
                CSpStreamFormat Fmt;
                Fmt.AssignFormat(eFmt);
                if ( m_cpOutAudio )
                {
                    hr = m_cpOutAudio->SetFormat( Fmt.FormatId(), Fmt.WaveFormatExPtr() );
                }
                else
                {
                    hr = E_FAIL;
                }

                if( SUCCEEDED( hr ) )
                {
                    hr = m_cpVoice->SetOutput( m_cpOutAudio, FALSE );
                }

                if( FAILED( hr ) )
                {
                    TTSAppStatusMessage( m_hWnd, _T("Format rejected\r\n") );
                }

                EnableSpeakButtons( SUCCEEDED( hr ) );
            }
            break;

        case IDC_SAVETOWAV:
        {
            TCHAR szFileName[256];
            _tcscpy_s(szFileName, _countof(szFileName), _T("\0"));

            bFileOpened = CallSaveFileDialog( szFileName,
                        _T("WAV (*.wav)\0*.wav\0All Files (*.*)\0*.*\0") );

            if (bFileOpened == FALSE) break;

            wcscpy_s( m_szWFileName, _countof(m_szWFileName), CT2W(szFileName) );

            CSpStreamFormat OriginalFmt;
            hr = m_cpVoice->GetOutputStream( &cpOldStream );
            if (hr == S_OK)
            {
                hr = OriginalFmt.AssignFormat(cpOldStream);
            }
            else
            {
                hr = E_FAIL;
            }
            // User SAPI helper function in sphelper.h to create a wav file
            if (SUCCEEDED(hr))
            {
                hr = SPBindToFile( m_szWFileName, SPFM_CREATE_ALWAYS, &cpWavStream, &OriginalFmt.FormatId(), OriginalFmt.WaveFormatExPtr() ); 
            }
            if( SUCCEEDED( hr ) )
            {
                // Set the voice's output to the wav file instead of the speakers
                hr = m_cpVoice->SetOutput(cpWavStream, TRUE);
            }

            if ( SUCCEEDED( hr ) )
            {
                // Do the Speak
                HandleSpeak();
            }

            // Set output back to original stream
            // Wait until the speak is finished if saving to a wav file so that
            // the smart pointer cpWavStream doesn't get released before its
            // finished writing to the wav.
            m_cpVoice->WaitUntilDone( INFINITE );
            cpWavStream.Release();
            
            // Reset output
            m_cpVoice->SetOutput( cpOldStream, FALSE );
            
            TCHAR   szTitle[MAX_PATH];
            TCHAR   szConfString[MAX_PATH];
            if ( SUCCEEDED( hr ) )
            {
                LoadString( m_hInst, IDS_SAVE_NOTIFY, szConfString, MAX_PATH );
                LoadString( m_hInst, IDS_NOTIFY_TITLE, szTitle, MAX_PATH );
                MessageBox( m_hWnd, szConfString, szTitle, MB_OK | MB_ICONINFORMATION );
            }
            else
            {
                LoadString( m_hInst, IDS_SAVE_ERROR, szConfString, MAX_PATH );
                MessageBox( m_hWnd, szConfString, NULL, MB_ICONEXCLAMATION );
            }

            break;
        }
    }
    
    return;
}
Ejemplo n.º 2
0
	void Sound::test() {

		ISpVoice * pVoice = NULL;
		ISpObjectToken*        pVoiceToken=nullptr;
		IEnumSpObjectTokens*   pEnum;
		ULONG                  ulCount = 0;

		if (FAILED(::CoInitialize(NULL)))
		{
			return;
		}
		HRESULT hr = S_OK;

		// Find the best matching installed en-us recognizer.
		CComPtr<ISpObjectToken> cpRecognizerToken;

		if (SUCCEEDED(hr))
		{
			hr = SpFindBestToken(SPCAT_RECOGNIZERS, L"language=409", NULL, &cpRecognizerToken);
		}

		// Create the in-process recognizer and immediately set its state to inactive.
		CComPtr<ISpRecognizer> cpRecognizer;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecognizer(cpRecognizerToken);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_INACTIVE);
		}

		// Create a new recognition context from the recognizer.
		CComPtr<ISpRecoContext> cpContext;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->CreateRecoContext(&cpContext);
		}

		// Subscribe to the speech recognition event and end stream event.
		if (SUCCEEDED(hr))
		{
			ULONGLONG ullEventInterest = SPFEI(SPEI_RECOGNITION);
			hr = cpContext->SetInterest(ullEventInterest, ullEventInterest);
		}

		// Establish a Win32 event to signal when speech events are available.
		HANDLE hSpeechNotifyEvent = INVALID_HANDLE_VALUE;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetNotifyWin32Event();
		}

		if (SUCCEEDED(hr))
		{
			hSpeechNotifyEvent = cpContext->GetNotifyEventHandle();

			if (INVALID_HANDLE_VALUE == hSpeechNotifyEvent)
			{
				// Notification handle unsupported.
				hr = E_NOINTERFACE;
			}
		}

		// Initialize an audio object to use the default audio input of the system and set the recognizer to use it.
		CComPtr<ISpAudio> cpAudioIn;

		if (SUCCEEDED(hr))
		{
			hr = cpAudioIn.CoCreateInstance(CLSID_SpMMAudioIn);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetInput(cpAudioIn, TRUE);
		}

		// Populate a WAVEFORMATEX struct with our desired output audio format. information.
		WAVEFORMATEX* pWfexCoMemRetainedAudioFormat = NULL;
		GUID guidRetainedAudioFormat = GUID_NULL;

		if (SUCCEEDED(hr))
		{
			hr = SpConvertStreamFormatEnum(SPSF_16kHz16BitMono, &guidRetainedAudioFormat, &pWfexCoMemRetainedAudioFormat);
		}

		// Instruct the recognizer to retain the audio from its recognition results.
		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetAudioOptions(SPAO_RETAIN_AUDIO, &guidRetainedAudioFormat, pWfexCoMemRetainedAudioFormat);
		}

		if (NULL != pWfexCoMemRetainedAudioFormat)
		{
			CoTaskMemFree(pWfexCoMemRetainedAudioFormat);
		}

		// Create a new grammar and load an SRGS grammar from file.
		CComPtr<ISpRecoGrammar> cpGrammar;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->CreateGrammar(0, &cpGrammar);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->LoadCmdFromFile(L"grammar.grxml", SPLO_STATIC);
		}

		// Set all top-level rules in the new grammar to the active state.
		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE);
		}

		// Set the recognizer state to active to begin recognition.
		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_ACTIVE_ALWAYS);
		}

		// Establish a separate Win32 event to signal the event loop exit.
		HANDLE hExitEvent = CreateEventW(NULL, FALSE, FALSE, NULL);

		// Collect the events listened for to pump the speech event loop.
		HANDLE rghEvents[] = { hSpeechNotifyEvent, hExitEvent };

		// Speech recognition event loop.
		BOOL fContinue = TRUE;

		while (fContinue && SUCCEEDED(hr))
		{
			// Wait for either a speech event or an exit event, with a 15 second timeout.
			DWORD dwMessage = WaitForMultipleObjects(sp_countof(rghEvents), rghEvents, FALSE, 15000);

			switch (dwMessage)
			{
				// With the WaitForMultipleObjects call above, WAIT_OBJECT_0 is a speech event from hSpeechNotifyEvent.
			case WAIT_OBJECT_0:
			{
				// Sequentially grab the available speech events from the speech event queue.
				CSpEvent spevent;

				while (S_OK == spevent.GetFrom(cpContext))
				{
					switch (spevent.eEventId)
					{
					case SPEI_RECOGNITION:
					{
						// Retrieve the recognition result and output the text of that result.
						ISpRecoResult* pResult = spevent.RecoResult();

						LPWSTR pszCoMemResultText = NULL;
						hr = pResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &pszCoMemResultText, NULL);

						if (SUCCEEDED(hr))
						{
							wprintf(L"Recognition event received, text=\"%s\"\r\n", pszCoMemResultText);
						}

						// Also retrieve the retained audio we requested.
						CComPtr<ISpStreamFormat> cpRetainedAudio;

						if (SUCCEEDED(hr))
						{
							hr = pResult->GetAudio(0, 0, &cpRetainedAudio);
						}

						// To demonstrate, we'll speak the retained audio back using ISpVoice.
						CComPtr<ISpVoice> cpVoice;

						if (SUCCEEDED(hr))
						{
							hr = cpVoice.CoCreateInstance(CLSID_SpVoice);
						}

						if (SUCCEEDED(hr))
						{
							hr = cpVoice->SpeakStream(cpRetainedAudio, SPF_DEFAULT, 0);
						}

						if (NULL != pszCoMemResultText)
						{
							CoTaskMemFree(pszCoMemResultText);
						}

						break;
					}
					}
				}

				break;
			}
			case WAIT_OBJECT_0 + 1:
			case WAIT_TIMEOUT:
			{
				// Exit event or timeout; discontinue the speech loop.
				fContinue = FALSE;
				//break;
			}
			}
		}

	CoUninitialize();

		CComPtr <ISpVoice>		cpVoice;
		CComPtr <ISpStream>		cpStream;
		CSpStreamFormat			cAudioFmt;

		//Create a SAPI Voice
		hr = cpVoice.CoCreateInstance(CLSID_SpVoice);

		//Set the audio format
		if (SUCCEEDED(hr))
		{
			hr = cAudioFmt.AssignFormat(SPSF_22kHz16BitMono);
		}

		//Call SPBindToFile, a SAPI helper method,  to bind the audio stream to the file
		if (SUCCEEDED(hr))
		{

			hr = SPBindToFile(L"c:\\ttstemp.wav", SPFM_CREATE_ALWAYS,
				&cpStream, &cAudioFmt.FormatId(), cAudioFmt.WaveFormatExPtr());
		}

		//set the output to cpStream so that the output audio data will be stored in cpStream
		if (SUCCEEDED(hr))
		{
			hr = cpVoice->SetOutput(cpStream, TRUE);
		}

		//Speak the text "hello world" synchronously
		if (SUCCEEDED(hr))
		{
			hr = cpVoice->Speak(L"Hello World", SPF_DEFAULT, NULL);
		}

		//close the stream
		if (SUCCEEDED(hr))
		{
			hr = cpStream->Close();
		}

		//Release the stream and voice object
		cpStream.Release();
		cpVoice.Release();

		CComPtr<ISpGrammarBuilder>    cpGrammarBuilder;
		SPSTATEHANDLE                 hStateTravel;
		// Create (if rule does not already exist)
		// top-level Rule, defaulting to Active.
		hr = cpGrammarBuilder->GetRule(L"Travel", 0, SPRAF_TopLevel | SPRAF_Active, TRUE, &hStateTravel);

		// Approach 1: List all possible phrases.
		// This is the most intuitive approach, and it does not sacrifice efficiency
		// because the grammar builder will merge shared sub-phrases when possible.
		// There is only one root state, hStateTravel, and the terminal NULL state,
		// and there are six unique transitions between root state and NULL state.

		/* XML Approximation:
		<rule id="Travel">
		<item> fly to Seattle </item>
		<item> fly to New York </item>
		<item> fly to Washington DC </item>
		<item> drive to Seattle </item>
		<item> drive to New York </item>
		<item> drive to Washington DC </item>
		</rule>
		*/

		// Create set of peer phrases, each containing complete phrase.
		// Note: the word delimiter is set as " ", so that the text we
		// attach to the transition can be multiple words (for example,
		// "fly to Seattle" is implicitly "fly" + "to" + "Seattle"):
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to Seattle", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to New York", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to Washington DC", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to Seattle", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to New York", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to Washington DC", L" ", SPWT_LEXICAL, 1, NULL);
		}
		// Find the best matching installed en-US recognizer.
		//CComPtr<ISpObjectToken> cpRecognizerToken;

		if (SUCCEEDED(hr))
		{
			hr = SpFindBestToken(SPCAT_RECOGNIZERS, L"language=409", NULL, &cpRecognizerToken);
		}

		// Create the in-process recognizer and immediately set its state to inactive.
		//CComPtr<ISpRecognizer> cpRecognizer;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecognizer(cpRecognizerToken);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_INACTIVE);
		}

		// Create a new recognition context from the recognizer.
		//CComPtr<ISpRecoContext> cpContext;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->CreateRecoContext(&cpContext);
		}

		// Subscribe to the speech recognition event and end stream event.
		if (SUCCEEDED(hr))
		{
			ULONGLONG ullEventInterest = SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM);
			hr = cpContext->SetInterest(ullEventInterest, ullEventInterest);
		}

		// Establish a Win32 event to signal when speech events are available.
		//HANDLE hSpeechNotifyEvent = INVALID_HANDLE_VALUE;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetNotifyWin32Event();
		}

		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetNotifyWin32Event();
		}

		if (SUCCEEDED(hr))
		{
			hSpeechNotifyEvent = cpContext->GetNotifyEventHandle();

			if (INVALID_HANDLE_VALUE == hSpeechNotifyEvent)
			{
				// Notification handle unsupported
				//hr = SPERR_UNITIALIZED;
			}
		}
		// Set up an audio input stream using a .wav file and set the recognizer's input.
		CComPtr<ISpStream> cpInputStream;

		if (SUCCEEDED(hr))
		{
			hr = SPBindToFile(L"Test.wav", SPFM_OPEN_READONLY, &cpInputStream);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetInput(cpInputStream, TRUE);
		}

		// Create a new grammar and load an SRGS grammar from file.
		//CComPtr<ISpRecoGrammar> cpGrammar;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->CreateGrammar(0, &cpGrammar);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->LoadCmdFromFile(L"grammar.grxml", SPLO_STATIC);
		}

		// Set all top-level rules in the new grammar to the active state.
		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE);
		}

		// Finally, set the recognizer state to active to begin recognition.
		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_ACTIVE_ALWAYS);
		}

		 hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void     **)&pVoice);
		if (SUCCEEDED(hr)) {
			hr = SpEnumTokens(SPCAT_VOICES, L"Gender=Female", NULL, &pEnum);
			if (SUCCEEDED(hr))
			{
				// Get the number of voices.
				hr = pEnum->GetCount(&ulCount);
			}

			// Obtain a list of available voice tokens, set
			// the voice to the token, and call Speak.
			while (SUCCEEDED(hr) && ulCount--)			{
				if (pVoiceToken != nullptr) {
					pVoiceToken->Release();
				}

				if (SUCCEEDED(hr))
				{
					hr = pEnum->Next(1, &pVoiceToken, NULL);
				}

				if (SUCCEEDED(hr))
				{
					hr = pVoice->SetVoice(pVoiceToken);
				}

				if (SUCCEEDED(hr))
				{
					wchar_t* start = L"<?xml version=\"1.0\" encoding=\"ISO - 8859 - 1\"?><speak version = \"1.0\" xmlns = \"http://www.w3.org/2001/10/synthesis\"	xml:lang = \"en-US\">";
					wchar_t* end = L"</speak>";
					const wchar_t *xml = L"<voice required = \"Gender=Male\"> hi! <prosody pitch=\"fast\"> This is low pitch. </prosody><prosody volume=\"x - loud\"> This is extra loud volume. </prosody>";
					wstring s = start;
					s += xml;
					s += end;
					
					hr = pVoice->Speak(xml, SPF_IS_XML| SPF_ASYNC, 0);
					//hr = pVoice->Speak(L"How are you?", SPF_DEFAULT, NULL);
				}

			}
			/*
			if (SUCCEEDED(hr)) {
				hr = pEnum->Next(1, &pVoiceToken, NULL);
				if (SUCCEEDED(hr)) {
					hr = pVoice->SetVoice(pVoiceToken);
					// Set the output to the default audio device.
					if (SUCCEEDED(hr)) {
						hr = pVoice->SetOutput(NULL, TRUE);
						if (SUCCEEDED(hr)) {
							hr = pVoice->Speak(L"Hello, world!", SPF_DEFAULT, 0);
						}
					}
				}
			}
			*/
			pVoice->Release();
		}
		::CoUninitialize();
	}
Ejemplo n.º 3
0
//Speech Initialization is done here
HRESULT CASRwrapper::InitSpeech(std::wstring sPathToFile, IStream * pMemStream)
{
	HRESULT hr = S_OK;

	hr = cpRecoEngine.CoCreateInstance(CLSID_SpInprocRecognizer);

	if (SUCCEEDED(hr))
	{
		hr = cpRecoEngine->CreateRecoContext(&m_cpRecoCtxt);
	}

	if (SUCCEEDED(hr))
	{
		WPARAM wparam = NULL;
		LPARAM lparam = NULL;
		hr = m_cpRecoCtxt->SetNotifyWin32Event();
		//hr = m_cpRecoCtxt->SetNotifyCallbackFunction(SpRecCallback,wparam,lparam);
		//	hr = m_cpRecoCtxt->SetNotifyWindowMessage(m_hWnd, WM_RECOEVENT, 0, 0);
	}

	if (SUCCEEDED(hr))
	{
		// This specifies which of the recognition events are going 
		//to trigger notifications. Here, all we are interested in 
		//is the beginning and ends of sounds, as well as
		// when the engine has recognized something
		//using ISpRecoContext
		const ULONGLONG ullInterest = SPFEI(SPEI_RECOGNITION);
		hr = m_cpRecoCtxt->SetInterest(ullInterest, ullInterest);
	}

	if (SUCCEEDED(hr))
	{
		// Specifies that the grammar we want is a dictation grammar.
		// Initializes the grammar (m_cpDictationGrammar)
		// using ISpRecoContext
		hr = m_cpRecoCtxt->CreateGrammar(GID_DICTATION, &m_cpDictationGrammar);
	}

	if (SUCCEEDED(hr))
	{
		//Load the dictation tool for the grammar specified
		//using ISpRecoGrammar
		hr = m_cpDictationGrammar->LoadDictation(NULL, SPLO_STATIC);
	}

	if (!sPathToFile.empty() || pMemStream != NULL)
	{
		CComPtr<ISpStream> cpInputStream;
		if (SUCCEEDED(hr))
		{
			// Create basic SAPI stream object
			// NOTE: The helper SpBindToFile can be used to perform the following operations
			hr = cpInputStream.CoCreateInstance(CLSID_SpStream);
		}

		CSpStreamFormat sInputFormat;
		// generate WaveFormatEx structure, assuming the wav format is 44kHz, 16-bit, Mono
		if (SUCCEEDED(hr))
		{
			hr = sInputFormat.AssignFormat(SPSF_44kHz16BitMono);
		}

		if (pMemStream != NULL)
		{
			if (SUCCEEDED(hr))
			{
				hr = cpInputStream->SetBaseStream(pMemStream, SPDFID_WaveFormatEx, sInputFormat.WaveFormatExPtr());
			}
		}
		else
		{
			if (SUCCEEDED(hr))
			{
				//   for read-only access, since it will only be access by the SR engine
				hr = cpInputStream->BindToFile(sPathToFile.c_str(),
					SPFM_OPEN_READONLY,
					&(sInputFormat.FormatId()),
					sInputFormat.WaveFormatExPtr(),
					SPFEI_ALL_EVENTS);
			}
		}

		if (SUCCEEDED(hr))
		{
			// connect wav input to recognizer
			// SAPI will negotiate mismatched engine/input audio formats using system audio codecs, so second parameter is not important - use default of TRUE
			hr = cpRecoEngine->SetInput(cpInputStream, TRUE);
		}

	}
	else //connect to mic
	{
		// create default audio object
		CComPtr<ISpAudio> cpAudio;
		if (SUCCEEDED(hr))
		{
			hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &cpAudio);
		}

		// set the input for the engine
		if (SUCCEEDED(hr))
		{
			hr = cpRecoEngine->SetInput(cpAudio, TRUE);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecoEngine->SetRecoState(SPRST_ACTIVE);
		}
	}


	if (FAILED(hr))
	{
		//Release the grammar using ISpRecoGrammar
		m_cpDictationGrammar.Release();
	}

	return hr;
}
Ejemplo n.º 4
0
//-----------------------------------------------------------------------------
// Purpose: Given a wave file and a string of words "text", creates a CFG from the
//  sentence and stores the resulting words/phonemes in CSentence
// Input  : *wavname - 
//			text - 
//			sentence - 
//			(*pfnPrint - 
// Output : SR_RESULT
//-----------------------------------------------------------------------------
SR_RESULT ExtractPhonemes( const char *wavname, CSpDynamicString& text, CSentence& sentence, void (*pfnPrint)( const char *fmt, ...) )
{
	// Assume failure
	SR_RESULT result = SR_RESULT_ERROR;

	if ( text.Length() <= 0 )
	{
		pfnPrint( "Error:  no rule / text specified\n" );
		return result;
	}

	USES_CONVERSION;
	HRESULT hr;
	
	CUtlVector < WORDRULETYPE > wordRules;

	CComPtr<ISpStream> cpInputStream;
	CComPtr<ISpRecognizer> cpRecognizer;
	CComPtr<ISpRecoContext> cpRecoContext;
	CComPtr<ISpRecoGrammar> cpRecoGrammar;
	CComPtr<ISpPhoneConverter>  cpPhoneConv;
    
	// Create basic SAPI stream object
	// NOTE: The helper SpBindToFile can be used to perform the following operations
	hr = cpInputStream.CoCreateInstance(CLSID_SpStream);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Stream object not installed?\n" );
		return result;
	}

	CSpStreamFormat sInputFormat;
	
	// setup stream object with wav file MY_WAVE_AUDIO_FILENAME
	//   for read-only access, since it will only be access by the SR engine
	hr = cpInputStream->BindToFile(
		T2W(wavname),
		SPFM_OPEN_READONLY,
		NULL,
		sInputFormat.WaveFormatExPtr(),
		SPFEI_ALL_EVENTS );

	if ( FAILED( hr ) )
	{
		pfnPrint( "Error: couldn't open wav file %s\n", wavname );
		return result;
	}
	
	// Create in-process speech recognition engine
	hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 In process recognizer object not installed?\n" );
		return result;
	}

	// Create recognition context to receive events
	hr = cpRecognizer->CreateRecoContext(&cpRecoContext);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to create recognizer context\n" );
		return result;
	}
	
	// Create a grammar
	hr = cpRecoContext->CreateGrammar( EP_GRAM_ID, &cpRecoGrammar );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to create recognizer grammar\n" );
		return result;
	}

	LANGID englishID = 0x409; // 1033 decimal

	bool userSpecified = false;
	LANGID langID = SpGetUserDefaultUILanguage();

	// Allow commandline override
	if ( CommandLine()->FindParm( "-languageid" ) != 0 )
	{
		userSpecified = true;
		langID = CommandLine()->ParmValue( "-languageid", langID );
	}

	// Create a phoneme converter ( so we can convert to IPA codes )
	hr = SpCreatePhoneConverter( langID, NULL, NULL, &cpPhoneConv );
	if ( FAILED( hr ) )
	{
		if ( langID != englishID )
		{
			if ( userSpecified )
			{
				pfnPrint( "Warning:  SAPI 5.1 Unable to create phoneme converter for command line override -languageid %i\n", langID );
			}
			else
			{
				pfnPrint( "Warning:  SAPI 5.1 Unable to create phoneme converter for default UI language %i\n",langID );
			}

			// Try english!!!
			langID = englishID;
			hr = SpCreatePhoneConverter( langID, NULL, NULL, &cpPhoneConv );
		}

		if ( FAILED( hr ) )
		{
			pfnPrint( "Error:  SAPI 5.1 Unable to create phoneme converter for English language id %i\n", langID );
			return result;
		}
		else
		{
			pfnPrint( "Note:  SAPI 5.1 Falling back to use english -languageid %i\n", langID );
		}
	}
	else if ( userSpecified )
	{
		pfnPrint( "Note:  SAPI 5.1 Using user specified -languageid %i\n",langID );
	}

	SPSTATEHANDLE hStateRoot;
	// create/re-create Root level rule of grammar
	hr = cpRecoGrammar->GetRule(L"Root", 0, SPRAF_TopLevel | SPRAF_Active, TRUE, &hStateRoot);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to create root rule\n" );
		return result;
	}

	// Inactivate it so we can alter it
	hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_INACTIVE );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to deactivate grammar rules\n" );
		return result;
	}

	// Create the rule set from the words in text
	{
		CSpDynamicString currentWord;
		WCHAR *pos = ( WCHAR * )text;
		WCHAR str[ 2 ];
		str[1]= 0;

		while ( *pos )
		{
			if ( *pos == L' ' /*|| *pos == L'.' || *pos == L'-'*/ )
			{
				// Add word to rule set
				if ( currentWord.Length() > 0 )
				{
					AddWordRule( cpRecoGrammar, &hStateRoot, &wordRules, currentWord );
					currentWord.Clear();
				}
				pos++;
				continue;
			}

			// Skip anything that's inside a [ xxx ] pair.
			if ( *pos == L'[' )
			{
				while ( *pos && *pos != L']' )
				{
					pos++;
				}

				if ( *pos )
				{
					pos++;
				}
				continue;
			}

			str[ 0 ] = *pos;

			currentWord.Append( str );
			pos++;
		}

		if ( currentWord.Length() > 0 )
		{
			AddWordRule( cpRecoGrammar, &hStateRoot, &wordRules, currentWord );
		}

		if ( wordRules.Size() <= 0 )
		{
			pfnPrint( "Error:  Text %s contained no usable words\n", text );
			return result;
		}

		// Build all word to word transitions in the grammar
		if ( !BuildRules( cpRecoGrammar, &hStateRoot, &wordRules ) )
		{
			pfnPrint( "Error:  Rule set for %s could not be generated\n", text );
			return result;
		}
	}

	// check for recognitions and end of stream event
	const ULONGLONG ullInterest = 
		SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM) | SPFEI(SPEI_FALSE_RECOGNITION) | 
		SPFEI(SPEI_PHRASE_START ) | SPFEI(SPEI_HYPOTHESIS ) | SPFEI(SPEI_INTERFERENCE) ;
	hr = cpRecoContext->SetInterest( ullInterest, ullInterest );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to set interest level\n" );
		return result;
	}	
	// use Win32 events for command-line style application
	hr = cpRecoContext->SetNotifyWin32Event();
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to set win32 notify event\n" );
		return result;
	}
	// connect wav input to recognizer
	// SAPI will negotiate mismatched engine/input audio formats using system audio codecs, so second parameter is not important - use default of TRUE
	hr = cpRecognizer->SetInput(cpInputStream, TRUE);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to associate input stream\n" );
		return result;
	}	

	// Activate the CFG ( rather than using dictation )
	hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_ACTIVE );
	if ( FAILED( hr ) )
	{
		switch ( hr )
		{
		case E_INVALIDARG:
			pfnPrint( "pszName is invalid or bad. Alternatively, pReserved is non-NULL\n" );
			break;
		case SP_STREAM_UNINITIALIZED:
			pfnPrint( "ISpRecognizer::SetInput has not been called with the InProc recognizer\n" );
			break;
		case SPERR_UNINITIALIZED:
			pfnPrint( "The object has not been properly initialized.\n");
			break;
		case SPERR_UNSUPPORTED_FORMAT:
			pfnPrint( "Audio format is bad or is not recognized. Alternatively, the device driver may be busy by another application and cannot be accessed.\n" );
			break;
		case SPERR_NOT_TOPLEVEL_RULE:
			pfnPrint( "The rule pszName exists, but is not a top-level rule.\n" );
			break;
		default:
			pfnPrint( "Unknown error\n" );
			break;
		}
		pfnPrint( "Error:  SAPI 5.1 Unable to activate rule set\n" );
		return result;
	}

	// while events occur, continue processing
	// timeout should be greater than the audio stream length, or a reasonable amount of time expected to pass before no more recognitions are expected in an audio stream
	BOOL fEndStreamReached = FALSE;
	while (!fEndStreamReached && S_OK == cpRecoContext->WaitForNotifyEvent( SR_WAVTIMEOUT ))
	{
		CSpEvent spEvent;
		// pull all queued events from the reco context's event queue
		
		while (!fEndStreamReached && S_OK == spEvent.GetFrom(cpRecoContext))
		{
			// Check event type
			switch (spEvent.eEventId)
			{
			case SPEI_INTERFERENCE:
				{
					SPINTERFERENCE interference = spEvent.Interference();

					switch ( interference )
					{
					case SPINTERFERENCE_NONE:
						pfnPrint( "[ I None ]\r\n" );
						break;
					case SPINTERFERENCE_NOISE:
						pfnPrint( "[ I Noise ]\r\n" );
						break;
					case SPINTERFERENCE_NOSIGNAL:
						pfnPrint( "[ I No Signal ]\r\n" );
						break;
					case SPINTERFERENCE_TOOLOUD:
						pfnPrint( "[ I Too Loud ]\r\n" );
						break;
					case SPINTERFERENCE_TOOQUIET:
						pfnPrint( "[ I Too Quiet ]\r\n" );
						break;
					case SPINTERFERENCE_TOOFAST:
						pfnPrint( "[ I Too Fast ]\r\n" );
						break;
					case SPINTERFERENCE_TOOSLOW:
						pfnPrint( "[ I Too Slow ]\r\n" );
						break;
					default:
						break;
					}
				}
				break;
			case SPEI_PHRASE_START:
				pfnPrint( "Phrase Start\r\n" );
				sentence.MarkNewPhraseBase();
				break;

			case SPEI_HYPOTHESIS:
			case SPEI_RECOGNITION:
			case SPEI_FALSE_RECOGNITION:
				{
                    CComPtr<ISpRecoResult> cpResult;
                    cpResult = spEvent.RecoResult();

                    CSpDynamicString dstrText;
                    if (spEvent.eEventId == SPEI_FALSE_RECOGNITION)
                    {
                        dstrText = L"(Unrecognized)";

						result = SR_RESULT_FAILED;

						// It's possible that the failed recog might have more words, so see if that's the case
						EnumeratePhonemes( cpPhoneConv, cpResult, sentence );
					}
                    else
                    {
						// Hypothesis or recognition success
                        cpResult->GetText( (ULONG)SP_GETWHOLEPHRASE, (ULONG)SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);

						EnumeratePhonemes( cpPhoneConv, cpResult, sentence );

						if ( spEvent.eEventId == SPEI_RECOGNITION )
						{
							result = SR_RESULT_SUCCESS;
						}

						pfnPrint( va( "%s%s\r\n", spEvent.eEventId == SPEI_HYPOTHESIS ? "[ Hypothesis ] " : "", dstrText.CopyToChar() ) );
					}
                    
                    cpResult.Release();
				}
				break;
				// end of the wav file was reached by the speech recognition engine
            case SPEI_END_SR_STREAM:
				fEndStreamReached = TRUE;
				break;
			}
			
			// clear any event data/object references
			spEvent.Clear();
		}// END event pulling loop - break on empty event queue OR end stream
	}// END event polling loop - break on event timeout OR end stream
	
	// Deactivate rule
	hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_INACTIVE );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to deactivate rule set\n" );
		return result;
	}

	// close the input stream, since we're done with it
	// NOTE: smart pointer will call SpStream's destructor, and consequently ::Close, but code may want to check for errors on ::Close operation
	hr = cpInputStream->Close();
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to close input stream\n" );
		return result;
	}

	return result;
}
Ejemplo n.º 5
0
void sapi::run (const char * text, unsigned samplerate, const char * path)
{
	HRESULT hr = E_FAIL;

	ISpStreamFormat_memblock * pSpStreamFormat_memblock = new ISpStreamFormat_memblock;
	mmh::comptr_t<ISpStreamFormat> pSpStreamFormat = pSpStreamFormat_memblock, pCurSpStreamFormat;

#if 0
	CSpStreamFormat	cAudioFmt;
	mmh::comptr_t<ISpStream> cpStream;

	hr = cAudioFmt.AssignFormat(SPSF_16kHz16BitMono);
	_check_hresult(hr);

	hr = SPBindToFile( pfc::stringcvt::string_wide_from_utf8(path),  SPFM_CREATE_ALWAYS, cpStream.get_pp(), &cAudioFmt.FormatId(), cAudioFmt.WaveFormatExPtr());
	_check_hresult(hr);
#endif

	hr = m_SpVoice->GetOutputStream(pCurSpStreamFormat.get_pp());
	_check_hresult(hr);

	GUID fmt; WAVEFORMATEX * wfe = NULL;
	hr = pCurSpStreamFormat->GetFormat(&fmt, &wfe);
	_check_hresult(hr);
	if (wfe == NULL)
		_check_hresult(E_FAIL);
	if (wfe->nChannels > 1)
	{
		wfe->nBlockAlign /= wfe->nChannels;
		wfe->nAvgBytesPerSec/= wfe->nChannels;
		wfe->nChannels = 1;
	}
	hr = pSpStreamFormat_memblock->AssignFormat(wfe);
	if (wfe) CoTaskMemFree(wfe);
	_check_hresult(hr);

#if 0
	hr = m_SpVoice->SetOutput( cpStream, FALSE );
#else
	hr = m_SpVoice->SetOutput( pSpStreamFormat, FALSE );
#endif
	_check_hresult(hr);

	hr = m_SpVoice->Speak( pfc::stringcvt::string_wide_from_utf8(text),  SPF_DEFAULT, NULL );
	_check_hresult(hr);

#if 0
	hr = cpStream->Close();
	return;
#endif

	const WAVEFORMATEX * pwfex = pSpStreamFormat_memblock->WaveFormatExPtr();
	if (pwfex == NULL)
		_check_hresult(E_FAIL);

	{
		static_api_ptr_t<audio_postprocessor> processor;
		dsp::ptr resampler;
		dsp_chunk_list_impl resampler_chunks;
		audio_chunk_impl chunk;
		mem_block_container_impl_t<pfc::alloc_fast_aggressive> chunk2;
		pfc::array_t<t_uint8, pfc::alloc_fast_aggressive> finalOutStream;

		if (!resampler_entry::g_create(resampler, chunk.get_sample_rate(), samplerate, 1.0))
			throw pfc::exception( pfc::string8() << "Could not create resampler (" << chunk.get_sample_rate() << " Hz -> " << samplerate << " Hz)");

		chunk.set_data_fixedpoint(pSpStreamFormat_memblock->get_ptr(), pSpStreamFormat_memblock->get_size(), pwfex->nSamplesPerSec, pwfex->nChannels, pwfex->wBitsPerSample, 1);
		resampler_chunks.add_chunk(&chunk);
		resampler->run(&resampler_chunks, metadb_handle_ptr(), dsp::FLUSH);

		t_riff_header riff;
		riff.id0 = 'R'|'I'<<8|'F'<<16|'F'<<24;
		riff.id1 = 'f'|'m'<<8|'t'<<16|' '<<24;
		riff.type0 = 'W'|'A'<<8|'V'<<16|'E'<<24;
		riff.id2 = 'd'|'a'<<8|'t'<<16|'a'<<24;
		riff.headersize = sizeof(riff.header);

		riff.header.wFormatTag=WAVE_FORMAT_PCM;

		riff.header.nSamplesPerSec = samplerate;//pwfex->nSamplesPerSec;
		riff.header.nChannels = pwfex->nChannels;
		riff.header.wBitsPerSample = pwfex->wBitsPerSample;
		riff.header.nBlockAlign=(riff.header.nChannels*riff.header.wBitsPerSample )/8;
		riff.header.nAvgBytesPerSec =(riff.header.nBlockAlign*riff.header.nSamplesPerSec );

		riff.datasize = (unsigned)chunk.get_sample_count()*riff.header.nBlockAlign;
		riff.filesize = riff.datasize + sizeof(riff);

		abort_callback_impl p_abort;
		file::ptr p_file;
		filesystem::g_open_write_new(p_file, path, p_abort);
		t_size i, count = resampler_chunks.get_count();
		for (i=0; i<count; i++)
		{
			audio_chunk * pChunk = resampler_chunks.get_item(i);
			if (pChunk)
			{
				processor->run(*pChunk, chunk2, 16, 16, false, 1.0);
				finalOutStream.append_fromptr((t_uint8*)chunk2.get_ptr(), chunk2.get_size());
			}
		}
		resampler_chunks.remove_all();
		riff.datasize = (unsigned)finalOutStream.get_size();
		riff.filesize = riff.datasize + sizeof(riff);
		p_file->write(&riff, sizeof(riff), p_abort);
		p_file->write(finalOutStream.get_ptr(), finalOutStream.get_size(), p_abort);
	}

}
Ejemplo n.º 6
0
//デバッグ用 認識結果をWaveファイルとして保存する
xreturn::r<bool> Recognition_SAPI::DebugSaveWavFile(const std::string& directory,ISpStreamFormat* streamFormat) const
{
	HRESULT hr;
	_USE_WINDOWS_ENCODING;

	const SPSTREAMFORMAT spFormat = SPSF_22kHz8BitMono;
	CSpStreamFormat Fmt( spFormat, &hr);
	if(FAILED(hr))	 return xreturn::windowsError(hr);

	{
		CSpStreamFormat OriginalFmt;
		{
			OriginalFmt.AssignFormat(streamFormat);

			// basic SAPI-stream for file-based storage
			CComPtr<ISpStream> cpStream;
			{
				ULONG cbWritten = 0;

				// create file on hard-disk for storing recognized audio, and specify audio format as the retained audio format
				std::string fff = directory + "\\" + num2str(time(NULL))+".wav";
				hr = SPBindToFile(_A2W(fff.c_str()) , SPFM_CREATE_ALWAYS, &cpStream, &OriginalFmt.FormatId(), OriginalFmt.WaveFormatExPtr(), SPFEI_ALL_EVENTS);
				if(FAILED(hr))	 return xreturn::windowsError(hr);

				// Continuously transfer data between the two streams until no more data is found (i.e. end of stream)
				// Note only transfer 1000 bytes at a time to creating large chunks of data at one time
				while (TRUE)
				{
					// for logging purposes, the app can retrieve the recognized audio stream length in bytes
					STATSTG stats;
					hr = streamFormat->Stat(&stats, NULL);
					if(FAILED(hr))	 return xreturn::windowsError(hr);

					// create a 1000-byte buffer for transferring
					BYTE bBuffer[1000];
					ULONG cbRead;

					// request 1000 bytes of data from the input stream
					hr = streamFormat->Read(bBuffer, 1000, &cbRead);
					// if data was returned??
					if (SUCCEEDED(hr) && cbRead > 0)
					{
						// then transfer/write the audio to the file-based stream
						hr = cpStream->Write(bBuffer, cbRead, &cbWritten);
						if(FAILED(hr))	 return xreturn::windowsError(hr);
					}

					// since there is no more data being added to the input stream, if the read request returned less than expected, the end of stream was reached, so break data transfer loop
					if (cbRead < 1000)
					{
						break;
					}
				}
			}
			// explicitly close the file-based stream to flush file data and allow app to immediately use the file
			hr = cpStream->Close();
			if(FAILED(hr))	 return xreturn::windowsError(hr);
		}
	}
	return true;
}