void speech_recognize::recordEvent()
{
	USES_CONVERSION;
	CSpEvent event;

	HRESULT hr = S_OK;
	if(m_SREngine.m_pRecoCtxt)
	{
		while( S_OK==event.GetFrom(m_SREngine.m_pRecoCtxt) )//等待创建语言主接口结束
		{
			switch(event.eEventId)
			{
			case SPEI_FALSE_RECOGNITION: //错误识别
				break;

			case SPEI_HYPOTHESIS: //假识别
			case SPEI_RECOGNITION:  //正确识别
				{
					CSpDynamicString dstrText;
					if (SUCCEEDED(event.RecoResult()->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, 
						TRUE, &dstrText, NULL)))
					{
						m_SREngine.m_pVoice->Speak(dstrText, SPF_ASYNC, NULL);
						executeCommand(event.RecoResult(), W2A(dstrText));
					}
					
				}
				break;

			default : 
				break;
			}
		}
	}
}
CString CTTS::GetText(ULONG ulStart, ULONG nlCount)
{
	USES_CONVERSION;
	CSpEvent event;
	CSpDynamicString dstrText;
	// Process all of the recognition events
	while (event.GetFrom(m_cpRecoCtxt) == S_OK)
	{
		switch (event.eEventId)
		{
			case SPEI_RECOGNITION:
			// There may be multiple recognition results, so get all of them
			{
				HRESULT hr = S_OK;
				if (nlCount == -1)
					event.RecoResult()->GetText(SP_GETWHOLEPHRASE, 
					SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);
				else
				{
					ASSERT(nlCount > 0);
					event.RecoResult()->GetText(ulStart, nlCount, FALSE, 
						&dstrText, NULL);
				}
			}
			break;
		}
	}
	CString str=(CString)dstrText;
	return str;
}
예제 #3
0
void mssapi_captions::main_thread()
try {
	HRESULT hr;

	os_set_thread_name(__FUNCTION__);

	hr = grammar->SetDictationState(SPRS_ACTIVE);
	if (FAILED(hr))
		throw HRError("SetDictationState failed", hr);

	hr = recognizer->SetRecoState(SPRST_ACTIVE);
	if (FAILED(hr))
		throw HRError("SetRecoState(SPRST_ACTIVE) failed", hr);

	HANDLE events[] = {notify, stop};

	started = true;

	for (;;) {
		DWORD ret = WaitForMultipleObjects(2, events, false, INFINITE);
		if (ret != WAIT_OBJECT_0)
			break;

		CSpEvent event;
		bool exit = false;

		while (event.GetFrom(context) == S_OK) {
			if (event.eEventId == SPEI_RECOGNITION) {
				ISpRecoResult *result = event.RecoResult();

				CoTaskMemPtr<wchar_t> text;
				hr = result->GetText((ULONG)-1, (ULONG)-1,
						true, &text, nullptr);
				if (FAILED(hr))
					continue;

				char text_utf8[512];
				os_wcs_to_utf8(text, 0, text_utf8, 512);

				callback(text_utf8);

				blog(LOG_DEBUG, "\"%s\"", text_utf8);

			} else if (event.eEventId == SPEI_END_SR_STREAM) {
				exit = true;
				break;
			}
		}

		if (exit)
			break;
	}

	audio->Stop();

} catch (HRError err) {
	blog(LOG_WARNING, "%s failed: %s (%lX)", __FUNCTION__, err.str, err.hr);
}
예제 #4
0
static void __stdcall recognitionCallback(WPARAM wParam, LPARAM lParam)
{
	CSpEvent ev;
	while (ev.GetFrom(state.recog) == S_OK) {
		if (ev.eEventId == SPEI_RECOGNITION) {
			handleRecognition(ev.RecoResult());
		}
	}
}
예제 #5
0
/*****************************************************************************************
* CSimpleDict::RecoEvent()
*   Called whenever the dialog process is notified of a recognition event.
*   Inserts whatever is recognized into the edit box.  
******************************************************************************************/
void CSimpleDict::RecoEvent()
{
    USES_CONVERSION;
    CSpEvent event;

    // Process all of the recognition events
    while (event.GetFrom(m_cpRecoCtxt) == S_OK)
    {
        switch (event.eEventId)
        {
            case SPEI_SOUND_START:
                m_bInSound = TRUE;
                break;

            case SPEI_SOUND_END:
                if (m_bInSound)
                {
                    m_bInSound = FALSE;
                    if (!m_bGotReco)
                    {
                        // The sound has started and ended, 
                        // but the engine has not succeeded in recognizing anything
						const TCHAR szNoise[] = _T("<noise>");
                        ::SendDlgItemMessage( m_hDlg, IDC_EDIT_DICT, 
							EM_REPLACESEL, TRUE, (LPARAM) szNoise );
                    }
                    m_bGotReco = FALSE;
                }
                break;

            case SPEI_RECOGNITION:
                // There may be multiple recognition results, so get all of them
                {
                    m_bGotReco = TRUE;
                    static const WCHAR wszUnrecognized[] = L"<Unrecognized>";

                    CSpDynamicString dstrText;
                    if (FAILED(event.RecoResult()->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, 
                                                            &dstrText, NULL)))
                    {
                        dstrText = wszUnrecognized;
                    }

                    // Concatenate a space onto the end of the recognized word
                    dstrText.Append(L" ");

                    ::SendDlgItemMessage( m_hDlg, IDC_EDIT_DICT, EM_REPLACESEL, TRUE, (LPARAM) W2T(dstrText) );

                }
                break;

        }
    }
} 
예제 #6
0
void CMyDlg::OnRecoEvent()
{
	//CMyPackManApp *pApp = (CMyPackManApp *)AfxGetApp();   //View -> App
	CMainFrame *pMain = (CMainFrame *)AfxGetMainWnd();  //View -> MainFrm
	CMyPackManView *pView = (CMyPackManView *)pMain->GetActiveView();

	USES_CONVERSION;
	CSpEvent event;
	//MessageBox(L"A");
	while (event.GetFrom(m_cpRecoCtxt) == S_OK)
	{

		switch (event.eEventId)
		{
		case SPEI_RECOGNITION:
		{
			m_bReco = TRUE;
			static const WCHAR wszUnrecognized[] = L"fail";

			CSpDynamicString dstrText;
			if (FAILED(event.RecoResult()->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE,
				&dstrText, NULL)))
			{
				dstrText = wszUnrecognized;
			}

			dstrText.Append(L" ");

			m_Edit = dstrText;

			//if (m_Edit == "up " || m_Edit == "Up " || m_Edit == "Down " || m_Edit == "down " || m_Edit == "left " || m_Edit == "Left " || m_Edit == "Right " || m_Edit == "right "
			//	|| m_Edit == "let " || m_Edit == "light " || m_Edit == "night ")
			//	pView->m_EditV = m_Edit;
			////::SendDlgItemMessage(m_hWnd, IDC_EDIT_DICT, EM_REPLACESEL, TRUE, (LPARAM)W2T(dstrText));
			//UpdateData(TRUE);
			//m_pView->Invalidate();

			if (m_Edit == "down " || m_Edit == "Down ")
				m_pView->mKey = DOWN;
			else if (m_Edit == "up " || m_Edit == "Up ")
				m_pView->mKey = UP;
			else if (m_Edit == "one " || m_Edit == "One ")
				m_pView->mKey = LEFT;
			else if (m_Edit == "two " || m_Edit == "Two ")
				m_pView->mKey = RITE;

			UpdateData(TRUE);
			//UpdateData(FALSE);

		}
		break;
		}
	}
}
예제 #7
0
void VOICEREC_process_event(HWND hWnd)
{
	CSpEvent event;  // Event helper class

	// Loop processing events while there are any in the queue
	while (event.GetFrom(p_recogContext) == S_OK)
	{
		// Look at recognition event only
		switch (event.eEventId)
		{
			case SPEI_RECOGNITION:
				VOICEREC_execute_command(event.RecoResult(), hWnd);
				break;

		}
	}
}
//input the sound
inline HRESULT BlockForResult(ISpRecoContext * pRecoCtxt, ISpRecoResult ** ppResult) //recording variable and the result
{
	HRESULT hr = S_OK;
	CSpEvent event;

	while (SUCCEEDED(hr) &&
		SUCCEEDED(hr = event.GetFrom(pRecoCtxt)) &&
		hr == S_FALSE)
	{
		hr = pRecoCtxt->WaitForNotifyEvent(INFINITE);
	}

	*ppResult = event.RecoResult();
	if (*ppResult)
	{
		(*ppResult)->AddRef();
	}
	return hr;
}
예제 #9
0
/******************************************************************************
* ProcessRecoEvent *
*------------------*
*   Description:
*       Called to when reco event message is sent to main window procedure.
*       In the case of a recognition, it extracts result and calls ExecuteCommand.
*
******************************************************************************/
void ProcessRecoEvent( HWND hWnd )
{
    CSpEvent event;  // Event helper class

    // Loop processing events while there are any in the queue
    while (event.GetFrom(g_cpRecoCtxt) == S_OK)
    {
        // Look at recognition event only
        switch (event.eEventId)
        {
            case SPEI_RECOGNITION:
                ExecuteCommand(event.RecoResult(), hWnd);
                break;
            case SPEI_FALSE_RECOGNITION:
                HandleFalseReco(event.RecoResult(), hWnd);
                break;

        }
    }
}
void CTTSApp::MainHandleSynthEvent()
/////////////////////////////////////////////////////////////////
//
// Handles the WM_TTSAPPCUSTOMEVENT application defined message and all
// of it's appropriate SAPI5 events.
//
{

    CSpEvent        event;  // helper class in sphelper.h for events that releases any 
                            // allocated memory in it's destructor - SAFER than SPEVENT
    SPVOICESTATUS   Stat;
    WPARAM          nStart;
    LPARAM          nEnd;
    int             i = 0;
    HRESULT         hr = S_OK;

    while( event.GetFrom(m_cpVoice) == S_OK )
    {
        switch( event.eEventId )
        {
            case SPEI_START_INPUT_STREAM:
                if( IsDlgButtonChecked( m_hWnd, IDC_EVENTS ) )
                {
                    TTSAppStatusMessage( m_hWnd, _T("StartStream event\r\n") );
                }
                break; 

            case SPEI_END_INPUT_STREAM:
                // Set global boolean stop to TRUE when finished speaking
                m_bStop = TRUE; 
                // Highlight entire text
                nStart = 0;
                nEnd = SendDlgItemMessage( m_hWnd, IDE_EDITBOX, WM_GETTEXTLENGTH, 0, 0 );
                SendDlgItemMessage( m_hWnd, IDE_EDITBOX, EM_SETSEL, nStart, nEnd );
                // Mouth closed
                g_iBmp = 0;
                InvalidateRect( m_hChildWnd, NULL, FALSE );
                if( IsDlgButtonChecked( m_hWnd, IDC_EVENTS ) )
                {
                    TTSAppStatusMessage( m_hWnd, _T("EndStream event\r\n") );
                }
                break;     
                
            case SPEI_VOICE_CHANGE:
                if( IsDlgButtonChecked( m_hWnd, IDC_EVENTS ) )
                {
                    TTSAppStatusMessage( m_hWnd, _T("Voicechange event\r\n") );
                }
                break;

            case SPEI_TTS_BOOKMARK:
                if( IsDlgButtonChecked( m_hWnd, IDC_EVENTS ) )
                {
                    // Get the string associated with the bookmark
                    // and add the null terminator.
                    TCHAR szBuff2[MAX_PATH] = _T("Bookmark event: ");

                    size_t cEventString = wcslen( event.String() ) + 1;
                    WCHAR *pwszEventString = new WCHAR[ cEventString ];
                    if ( pwszEventString )
                    {
                        wcscpy_s( pwszEventString, cEventString, event.String() );
                        _tcscat_s( szBuff2, _countof(szBuff2), CW2T(pwszEventString) );
                        delete[] pwszEventString;
                    }

                    _tcscat_s( szBuff2, _countof(szBuff2), _T("\r\n") );
                    TTSAppStatusMessage( m_hWnd, szBuff2 );
                }
                break;

            case SPEI_WORD_BOUNDARY:
                hr = m_cpVoice->GetStatus( &Stat, NULL );
                if( FAILED( hr ) )
                {
                    TTSAppStatusMessage( m_hWnd, _T("Voice GetStatus error\r\n") );
                }

                // Highlight word
                nStart = (LPARAM)( Stat.ulInputWordPos / sizeof(char) );
                nEnd = nStart + Stat.ulInputWordLen;
                SendDlgItemMessage( m_hWnd, IDE_EDITBOX, EM_SETSEL, nStart, nEnd );
                if( IsDlgButtonChecked( m_hWnd, IDC_EVENTS ) )
                {
                    
                    TTSAppStatusMessage( m_hWnd, _T("Wordboundary event\r\n") );
                }
                break;

            case SPEI_PHONEME:
                if( IsDlgButtonChecked( m_hWnd, IDC_EVENTS ) )
                {
                    TTSAppStatusMessage( m_hWnd, _T("Phoneme event\r\n") );
                }
                break;

            case SPEI_VISEME:
                // Get the current mouth viseme position and map it to one of the 
                // 7 mouth bitmaps. 
                g_iBmp = g_aMapVisemeToImage[event.Viseme()]; // current viseme

                InvalidateRect( m_hChildWnd, NULL, FALSE );
                if( IsDlgButtonChecked( m_hWnd, IDC_EVENTS ) )
                {
                    TTSAppStatusMessage( m_hWnd, _T("Viseme event\r\n") );
                }
                break;

            case SPEI_SENTENCE_BOUNDARY:
                if( IsDlgButtonChecked( m_hWnd, IDC_EVENTS ) )
                {
                    TTSAppStatusMessage( m_hWnd, _T("Sentence event\r\n") );
                }
                break;

            case SPEI_TTS_AUDIO_LEVEL:
                if( IsDlgButtonChecked( m_hWnd, IDC_EVENTS ) )
                {
                    WCHAR wszBuff[MAX_PATH];
                    swprintf_s(wszBuff, _countof(wszBuff), L"Audio level: %d\r\n", (ULONG)event.wParam);
                    TTSAppStatusMessage( m_hWnd, CW2T(wszBuff) );
                }
                break;

            case SPEI_TTS_PRIVATE:
                if( IsDlgButtonChecked( m_hWnd, IDC_EVENTS ) )
                {
                    TTSAppStatusMessage( m_hWnd, _T("Private engine event\r\n") );
                }
                break;

            default:
                TTSAppStatusMessage( m_hWnd, _T("Unknown message\r\n") );
                break;
        }
    }
}
예제 #11
0
/**
	This is called when SAPI 5.1 has an event.

	In the textless case, we only handle SPIE_RECOGNITION event. We aren't looking
	at SPIE_HYPOTHESIS. This might be an error. We might be more robust by handling
	both.

	We process the event and add the phonemes we get to the result list
**/
void sapi_textless_lipsync::callback()
{
    CSpEvent event;	// the event

	ISpRecoResult *pRecoResult;			// recoResult from the event
	SPPHRASE      *pSpPhrase;			// phrase from recoResult
	SPRECORESULTTIMES pRecoResultTimes; // result times from RecoResult
	WCHAR phone_buffer[256];			// phoneme buffer for conversion
	long msStart;						// time stamp of the result 
	

    while (event.GetFrom(this->m_recogCntxt) == S_OK)
    {
		if (event.eEventId == SPEI_RECOGNITION /*|| event.eEventId == SPEI_HYPOTHESIS */)
		{	
			// for textless we only accept full recognition. This might be an area
			// to watch out for
			
			// pull out the result object
			pRecoResult = event.RecoResult();

			// pull the whole text from the result
			CSpDynamicString pSapiText;
			pRecoResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, FALSE, &pSapiText, NULL);

            // get the start time for the phrase. we use this as an offset for the phrase
			// elements. Not sure if this is correct.
			pRecoResult->GetResultTimes(&pRecoResultTimes);
			msStart = sapi_time_to_milli(pRecoResultTimes.ullStart);

			// extract the phrase object
			pRecoResult->GetPhrase(&pSpPhrase);

			if (pSpPhrase != NULL)
			{
				// Process each element of the phrase. These should be our
				// orthorgraphs
				const SPPHRASEELEMENT *p = pSpPhrase->pElements;
				const SPPHRASEELEMENT *pEnd = p + pSpPhrase->Rule.ulCountOfElements;
				while (p != pEnd)
				{
					// for each phrase element we create a marker 
					// that contains the time stamps along with the 
					// phonemes. associated with it.
					alignment_result al;
                    al.m_orthography = p->pszDisplayText;
					// Get the phonemes
					ULONG j = 0;
                    SPPHONEID phn[2];
                    phn[1] = 0x00;
                    while (p->pszPronunciation[j] != 0)
                    {
						// process each phoneme
                        phn[0] = p->pszPronunciation[j];
                        m_phnCvt->IdToPhone(phn, phone_buffer);
                        al.m_phonemes.push_back(phone_buffer);
                        j++;
                    }
					// start time of the ortheme
                    al.m_msStart= msStart + bytes_to_milli(p->ulAudioStreamOffset);
					// end time of the ortheme
					al.m_msEnd = bytes_to_milli(p->ulAudioSizeBytes);
                    al.m_msEnd += al.m_msStart;
					// add it to the results
                    m_results.push_back(al);

					p++;
				}
			}
		}
		else if (event.eEventId == SPEI_END_SR_STREAM)
		{
			// This event occurs when the stream has finished processing.
            // we set a flag to indicate that things are done.
			m_bDone = TRUE;        
		}
    }
}
예제 #12
0
/**
	This is called by SAPI 5.1 when it has an event. 
	
	We use the SpEvent class provided by their SDK to simplify the processing.

	Basically, when we get a "RECOGNITION" event or a "SPEI_HYPOTHESIS" event
	we process them the same. Hypothesis are more likely, for all but very
	short files, "SPIE_RECOGNITION" is a rarity. 

	Since the hypothesis will include duplicate data, we have a decision. We
	can save the newest hypothesis or we can save the one which generates the
	most alignments. Imperically, it seems that sticking with the longest
	result works best. But perhaps this is not so.
**/
void sapi_textbased_lipsync::callback()
{
    //USES_CONVERSION;
    CSpEvent event;

	ISpRecoResult *pRecoResult; // recoResult from the event
	SPPHRASE *pSpPhrase;	// phrase from recoResult
	SPRECORESULTTIMES pRecoResultTimes; // result times from RecoResult
	WCHAR phone_buffer[256];			// buffer for the phonemes
	UINT msStart;						// start time of the phrase
	

	// Process the events
	while (event.GetFrom(this->m_recogCntxt) == S_OK)
    {
		if (event.eEventId == SPEI_RECOGNITION || event.eEventId == SPEI_HYPOTHESIS)
		{
			// text based has to accept hypothesis or it mostly fails unless the
			// script is very short

			// pull out the result object
			pRecoResult = event.RecoResult();

			// pull the whole text from the result
			CSpDynamicString pSapiText;
			pRecoResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, FALSE, &pSapiText, NULL);

			// get the start time for the phrase. we use this as an offset for the phrase
			// elements. Not sure if this is correct.
			pRecoResult->GetResultTimes(&pRecoResultTimes);
			msStart = sapi_time_to_milli(pRecoResultTimes.ullStart);

			std::wstring strPrintText = pSapiText;
			std::cerr << "hypothesis: " << wstring_2_string(strPrintText) << std::endl;
			// if the new results are longer than existing results in orthographic form
			// we accept the results and process the phonemes. Otherwise, we skip it
            if ((wcslen(pSapiText) > this->m_strResults.size()))
            {
                m_strResults = pSapiText;
				// clear the old results. This hypothesis trumps it
                this->m_results.clear();
			    
			    // extract the phrase object
			    pRecoResult->GetPhrase(&pSpPhrase);

			    if (pSpPhrase != NULL)
			    {
				    // Process each element of the phrase. These should be our
					// orthorgraphs
					const SPPHRASEELEMENT *p = pSpPhrase->pElements;
					const SPPHRASEELEMENT *pEnd = p + pSpPhrase->Rule.ulCountOfElements;
				    while (p != pEnd)
				    {
						// for each phrase element we create a marker 
					// that contains the time stamps along with the 
					// phonemes. associated with it.
					    alignment_result al;
                        al.m_orthography = p->pszDisplayText;
					    // Get the phonemes
                        ULONG j = 0;
                        SPPHONEID phn[2];
                        phn[1] = 0x00;
                        while (p->pszPronunciation[j] != 0)
                        {
                            // process each phoneme
                            phn[0] = p->pszPronunciation[j];
                            m_phnCvt->IdToPhone(phn, phone_buffer);
                            al.m_phonemes.push_back(phone_buffer);
                            j++;
                        }
                                             
                        // start time of the ortheme
					    al.m_msStart= msStart + bytes_to_milli(p->ulAudioStreamOffset);
					    // end time of the ortheme
					    al.m_msEnd = bytes_to_milli(p->ulAudioSizeBytes);
                        al.m_msEnd += al.m_msStart;
						// add it to the results
                        m_results.push_back(al);
						
						p++;
				    }
			    }
            }
		}
		else if (event.eEventId == SPEI_END_SR_STREAM)
		{
			// This event occurs when the stream has finished processing.
            // we set a flag to indicate that things are done.
			m_bDone = TRUE;        
		}
    }
}
예제 #13
0
void RSpeechRecognition::CallbackRule()
{
	USES_CONVERSION;
	HRESULT hr;
	std::string dictationString;

	CSpEvent ruleEvent;
	hr = ruleEvent.GetFrom( this->RuleRecoCtxt );
	if ( FAILED(hr) )	return ;

	//認識した結果
	ISpRecoResult* result;
	result = ruleEvent.RecoResult();

	//認識した文字列の取得
	CSpDynamicString dstrText;
	hr = result->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);
	if ( FAILED(hr) )	return ;
	this->ResultString = W2A(dstrText);

	//ルールベースで認識した結果の音声部分をもう一度 ディクテーションにかけます。
	//これで過剰なマッチを排除します。
	{
		CComPtr<ISpStreamFormat>	resultStream;
		hr = result->GetAudio( 0, 0, &resultStream );
		if ( FAILED(hr) )	return;

		//オーディオから読み込んでね
		hr = this->DictationEngine->SetInput( resultStream, TRUE);  
		if(FAILED(hr))	 return;

		hr = this->DictationGrammar->SetDictationState(SPRS_ACTIVE );
		if(FAILED(hr))	 return;

		hr = this->DictationRecoCtxt->WaitForNotifyEvent(10000); //10秒タイムアウト
		if ( FAILED(hr) )	return;

		hr = this->DictationGrammar->SetDictationState(SPRS_INACTIVE );
		if(FAILED(hr))	 return;

		CSpEvent tempevent;
		hr = tempevent.GetFrom( this->DictationRecoCtxt );
		if ( FAILED(hr) )	return ;

		//認識した結果
		ISpRecoResult* tempresult;
		tempresult = tempevent.RecoResult();

		//認識した文字列の取得
		CSpDynamicString tempdstrText;
		hr = tempresult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &tempdstrText, NULL);
		if ( FAILED(hr) )	return ;
		std::string dictationString = W2A(tempdstrText);
		//ディクテーションフィルターで絞る
		if ( dictationString.find(this->DicticationFilterWord) == std::string::npos )
		{
			//フィルターにより拒否
			this->FlagCleanup();
			return ;
		}
	}


	//認識に XMLを使用した場合、代入された結果を得る.
	SPPHRASE *pPhrase;
	hr = result->GetPhrase(&pPhrase);
	if ( FAILED(hr) )	return ;

	this->ResultMap.clear();
	const SPPHRASEPROPERTY *pProp;
	for (pProp = pPhrase->pProperties; pProp; pProp = pProp->pNextSibling)
	{
		this->ResultMap[ W2A(pProp->pszName) ] = W2A(pProp->pszValue);
	}
	CoTaskMemFree(pPhrase);

	//コマンド認識
	SendMessage(this->CallbackWindowHandle , this->CallbackWindowMesage , 0 , 0);
	this->FlagCleanup();
}
예제 #14
0
void obs_captions::main_thread()
try {
	ComPtr<CaptionStream>  audio;
	ComPtr<ISpObjectToken> token;
	ComPtr<ISpRecoGrammar> grammar;
	ComPtr<ISpRecognizer>  recognizer;
	ComPtr<ISpRecoContext> context;
	HRESULT hr;

	auto cb = [&] (const struct audio_data *audio_data,
			bool muted)
	{
		audio->PushAudio(audio_data, muted);
	};

	using cb_t = decltype(cb);

	auto pre_cb = [] (void *param, obs_source_t*,
		const struct audio_data *audio_data, bool muted)
	{
		return (*static_cast<cb_t*>(param))(audio_data, muted);
	};

	os_set_thread_name(__FUNCTION__);

	CoInitialize(nullptr);

	wchar_t lang_str[32];
	_snwprintf(lang_str, 31, L"language=%x", (int)captions->lang_id);

	hr = SpFindBestToken(SPCAT_RECOGNIZERS, lang_str, nullptr, &token);
	if (FAILED(hr))
		throw HRError("SpFindBestToken failed", hr);

	hr = CoCreateInstance(CLSID_SpInprocRecognizer, nullptr, CLSCTX_ALL,
			__uuidof(ISpRecognizer), (void**)&recognizer);
	if (FAILED(hr))
		throw HRError("CoCreateInstance for recognizer failed", hr);

	hr = recognizer->SetRecognizer(token);
	if (FAILED(hr))
		throw HRError("SetRecognizer failed", hr);

	hr = recognizer->SetRecoState(SPRST_INACTIVE);
	if (FAILED(hr))
		throw HRError("SetRecoState(SPRST_INACTIVE) failed", hr);

	hr = recognizer->CreateRecoContext(&context);
	if (FAILED(hr))
		throw HRError("CreateRecoContext failed", hr);

	ULONGLONG interest = SPFEI(SPEI_RECOGNITION) |
		SPFEI(SPEI_END_SR_STREAM);
	hr = context->SetInterest(interest, interest);
	if (FAILED(hr))
		throw HRError("SetInterest failed", hr);

	HANDLE notify;

	hr = context->SetNotifyWin32Event();
	if (FAILED(hr))
		throw HRError("SetNotifyWin32Event", hr);

	notify = context->GetNotifyEventHandle();
	if (notify == INVALID_HANDLE_VALUE)
		throw HRError("GetNotifyEventHandle failed", E_NOINTERFACE);

	size_t sample_rate = audio_output_get_sample_rate(obs_get_audio());
	audio = new CaptionStream((DWORD)sample_rate);
	audio->Release();

	hr = recognizer->SetInput(audio, false);
	if (FAILED(hr))
		throw HRError("SetInput failed", hr);

	hr = context->CreateGrammar(1, &grammar);
	if (FAILED(hr))
		throw HRError("CreateGrammar failed", hr);

	hr = grammar->LoadDictation(nullptr, SPLO_STATIC);
	if (FAILED(hr))
		throw HRError("LoadDictation failed", hr);

	hr = grammar->SetDictationState(SPRS_ACTIVE);
	if (FAILED(hr))
		throw HRError("SetDictationState failed", hr);

	hr = recognizer->SetRecoState(SPRST_ACTIVE);
	if (FAILED(hr))
		throw HRError("SetRecoState(SPRST_ACTIVE) failed", hr);

	HANDLE events[] = {notify, stop_event};

	{
		captions->source = GetWeakSourceByName(
				captions->source_name.c_str());
		OBSSource strong = OBSGetStrongRef(source);
		if (strong)
			obs_source_add_audio_capture_callback(strong,
					pre_cb, &cb);
	}

	for (;;) {
		DWORD ret = WaitForMultipleObjects(2, events, false, INFINITE);
		if (ret != WAIT_OBJECT_0)
			break;

		CSpEvent event;
		bool exit = false;

		while (event.GetFrom(context) == S_OK) {
			if (event.eEventId == SPEI_RECOGNITION) {
				ISpRecoResult *result = event.RecoResult();

				CoTaskMemPtr<wchar_t> text;
				hr = result->GetText((ULONG)-1, (ULONG)-1,
						true, &text, nullptr);
				if (FAILED(hr))
					continue;

				char text_utf8[512];
				os_wcs_to_utf8(text, 0, text_utf8, 512);

				obs_output_t *output =
					obs_frontend_get_streaming_output();
				if (output)
					obs_output_output_caption_text1(output,
							text_utf8);

				debug("\"%s\"", text_utf8);

				obs_output_release(output);

			} else if (event.eEventId == SPEI_END_SR_STREAM) {
				exit = true;
				break;
			}
		}

		if (exit)
			break;
	}

	{
		OBSSource strong = OBSGetStrongRef(source);
		if (strong)
			obs_source_remove_audio_capture_callback(strong,
					pre_cb, &cb);
	}

	audio->Stop();

	CoUninitialize();

} catch (HRError err) {
	error("%s failed: %s (%lX)", __FUNCTION__, err.str, err.hr);
	CoUninitialize();
	captions->th.detach();
}
예제 #15
0
	void Sound::test() {

		ISpVoice * pVoice = NULL;
		ISpObjectToken*        pVoiceToken=nullptr;
		IEnumSpObjectTokens*   pEnum;
		ULONG                  ulCount = 0;

		if (FAILED(::CoInitialize(NULL)))
		{
			return;
		}
		HRESULT hr = S_OK;

		// Find the best matching installed en-us recognizer.
		CComPtr<ISpObjectToken> cpRecognizerToken;

		if (SUCCEEDED(hr))
		{
			hr = SpFindBestToken(SPCAT_RECOGNIZERS, L"language=409", NULL, &cpRecognizerToken);
		}

		// Create the in-process recognizer and immediately set its state to inactive.
		CComPtr<ISpRecognizer> cpRecognizer;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecognizer(cpRecognizerToken);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_INACTIVE);
		}

		// Create a new recognition context from the recognizer.
		CComPtr<ISpRecoContext> cpContext;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->CreateRecoContext(&cpContext);
		}

		// Subscribe to the speech recognition event and end stream event.
		if (SUCCEEDED(hr))
		{
			ULONGLONG ullEventInterest = SPFEI(SPEI_RECOGNITION);
			hr = cpContext->SetInterest(ullEventInterest, ullEventInterest);
		}

		// Establish a Win32 event to signal when speech events are available.
		HANDLE hSpeechNotifyEvent = INVALID_HANDLE_VALUE;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetNotifyWin32Event();
		}

		if (SUCCEEDED(hr))
		{
			hSpeechNotifyEvent = cpContext->GetNotifyEventHandle();

			if (INVALID_HANDLE_VALUE == hSpeechNotifyEvent)
			{
				// Notification handle unsupported.
				hr = E_NOINTERFACE;
			}
		}

		// Initialize an audio object to use the default audio input of the system and set the recognizer to use it.
		CComPtr<ISpAudio> cpAudioIn;

		if (SUCCEEDED(hr))
		{
			hr = cpAudioIn.CoCreateInstance(CLSID_SpMMAudioIn);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetInput(cpAudioIn, TRUE);
		}

		// Populate a WAVEFORMATEX struct with our desired output audio format. information.
		WAVEFORMATEX* pWfexCoMemRetainedAudioFormat = NULL;
		GUID guidRetainedAudioFormat = GUID_NULL;

		if (SUCCEEDED(hr))
		{
			hr = SpConvertStreamFormatEnum(SPSF_16kHz16BitMono, &guidRetainedAudioFormat, &pWfexCoMemRetainedAudioFormat);
		}

		// Instruct the recognizer to retain the audio from its recognition results.
		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetAudioOptions(SPAO_RETAIN_AUDIO, &guidRetainedAudioFormat, pWfexCoMemRetainedAudioFormat);
		}

		if (NULL != pWfexCoMemRetainedAudioFormat)
		{
			CoTaskMemFree(pWfexCoMemRetainedAudioFormat);
		}

		// Create a new grammar and load an SRGS grammar from file.
		CComPtr<ISpRecoGrammar> cpGrammar;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->CreateGrammar(0, &cpGrammar);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->LoadCmdFromFile(L"grammar.grxml", SPLO_STATIC);
		}

		// Set all top-level rules in the new grammar to the active state.
		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE);
		}

		// Set the recognizer state to active to begin recognition.
		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_ACTIVE_ALWAYS);
		}

		// Establish a separate Win32 event to signal the event loop exit.
		HANDLE hExitEvent = CreateEventW(NULL, FALSE, FALSE, NULL);

		// Collect the events listened for to pump the speech event loop.
		HANDLE rghEvents[] = { hSpeechNotifyEvent, hExitEvent };

		// Speech recognition event loop.
		BOOL fContinue = TRUE;

		while (fContinue && SUCCEEDED(hr))
		{
			// Wait for either a speech event or an exit event, with a 15 second timeout.
			DWORD dwMessage = WaitForMultipleObjects(sp_countof(rghEvents), rghEvents, FALSE, 15000);

			switch (dwMessage)
			{
				// With the WaitForMultipleObjects call above, WAIT_OBJECT_0 is a speech event from hSpeechNotifyEvent.
			case WAIT_OBJECT_0:
			{
				// Sequentially grab the available speech events from the speech event queue.
				CSpEvent spevent;

				while (S_OK == spevent.GetFrom(cpContext))
				{
					switch (spevent.eEventId)
					{
					case SPEI_RECOGNITION:
					{
						// Retrieve the recognition result and output the text of that result.
						ISpRecoResult* pResult = spevent.RecoResult();

						LPWSTR pszCoMemResultText = NULL;
						hr = pResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &pszCoMemResultText, NULL);

						if (SUCCEEDED(hr))
						{
							wprintf(L"Recognition event received, text=\"%s\"\r\n", pszCoMemResultText);
						}

						// Also retrieve the retained audio we requested.
						CComPtr<ISpStreamFormat> cpRetainedAudio;

						if (SUCCEEDED(hr))
						{
							hr = pResult->GetAudio(0, 0, &cpRetainedAudio);
						}

						// To demonstrate, we'll speak the retained audio back using ISpVoice.
						CComPtr<ISpVoice> cpVoice;

						if (SUCCEEDED(hr))
						{
							hr = cpVoice.CoCreateInstance(CLSID_SpVoice);
						}

						if (SUCCEEDED(hr))
						{
							hr = cpVoice->SpeakStream(cpRetainedAudio, SPF_DEFAULT, 0);
						}

						if (NULL != pszCoMemResultText)
						{
							CoTaskMemFree(pszCoMemResultText);
						}

						break;
					}
					}
				}

				break;
			}
			case WAIT_OBJECT_0 + 1:
			case WAIT_TIMEOUT:
			{
				// Exit event or timeout; discontinue the speech loop.
				fContinue = FALSE;
				//break;
			}
			}
		}

	CoUninitialize();

		CComPtr <ISpVoice>		cpVoice;
		CComPtr <ISpStream>		cpStream;
		CSpStreamFormat			cAudioFmt;

		//Create a SAPI Voice
		hr = cpVoice.CoCreateInstance(CLSID_SpVoice);

		//Set the audio format
		if (SUCCEEDED(hr))
		{
			hr = cAudioFmt.AssignFormat(SPSF_22kHz16BitMono);
		}

		//Call SPBindToFile, a SAPI helper method,  to bind the audio stream to the file
		if (SUCCEEDED(hr))
		{

			hr = SPBindToFile(L"c:\\ttstemp.wav", SPFM_CREATE_ALWAYS,
				&cpStream, &cAudioFmt.FormatId(), cAudioFmt.WaveFormatExPtr());
		}

		//set the output to cpStream so that the output audio data will be stored in cpStream
		if (SUCCEEDED(hr))
		{
			hr = cpVoice->SetOutput(cpStream, TRUE);
		}

		//Speak the text "hello world" synchronously
		if (SUCCEEDED(hr))
		{
			hr = cpVoice->Speak(L"Hello World", SPF_DEFAULT, NULL);
		}

		//close the stream
		if (SUCCEEDED(hr))
		{
			hr = cpStream->Close();
		}

		//Release the stream and voice object
		cpStream.Release();
		cpVoice.Release();

		CComPtr<ISpGrammarBuilder>    cpGrammarBuilder;
		SPSTATEHANDLE                 hStateTravel;
		// Create (if rule does not already exist)
		// top-level Rule, defaulting to Active.
		hr = cpGrammarBuilder->GetRule(L"Travel", 0, SPRAF_TopLevel | SPRAF_Active, TRUE, &hStateTravel);

		// Approach 1: List all possible phrases.
		// This is the most intuitive approach, and it does not sacrifice efficiency
		// because the grammar builder will merge shared sub-phrases when possible.
		// There is only one root state, hStateTravel, and the terminal NULL state,
		// and there are six unique transitions between root state and NULL state.

		/* XML Approximation:
		<rule id="Travel">
		<item> fly to Seattle </item>
		<item> fly to New York </item>
		<item> fly to Washington DC </item>
		<item> drive to Seattle </item>
		<item> drive to New York </item>
		<item> drive to Washington DC </item>
		</rule>
		*/

		// Create set of peer phrases, each containing complete phrase.
		// Note: the word delimiter is set as " ", so that the text we
		// attach to the transition can be multiple words (for example,
		// "fly to Seattle" is implicitly "fly" + "to" + "Seattle"):
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to Seattle", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to New York", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to Washington DC", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to Seattle", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to New York", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to Washington DC", L" ", SPWT_LEXICAL, 1, NULL);
		}
		// Find the best matching installed en-US recognizer.
		//CComPtr<ISpObjectToken> cpRecognizerToken;

		if (SUCCEEDED(hr))
		{
			hr = SpFindBestToken(SPCAT_RECOGNIZERS, L"language=409", NULL, &cpRecognizerToken);
		}

		// Create the in-process recognizer and immediately set its state to inactive.
		//CComPtr<ISpRecognizer> cpRecognizer;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecognizer(cpRecognizerToken);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_INACTIVE);
		}

		// Create a new recognition context from the recognizer.
		//CComPtr<ISpRecoContext> cpContext;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->CreateRecoContext(&cpContext);
		}

		// Subscribe to the speech recognition event and end stream event.
		if (SUCCEEDED(hr))
		{
			ULONGLONG ullEventInterest = SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM);
			hr = cpContext->SetInterest(ullEventInterest, ullEventInterest);
		}

		// Establish a Win32 event to signal when speech events are available.
		//HANDLE hSpeechNotifyEvent = INVALID_HANDLE_VALUE;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetNotifyWin32Event();
		}

		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetNotifyWin32Event();
		}

		if (SUCCEEDED(hr))
		{
			hSpeechNotifyEvent = cpContext->GetNotifyEventHandle();

			if (INVALID_HANDLE_VALUE == hSpeechNotifyEvent)
			{
				// Notification handle unsupported
				//hr = SPERR_UNITIALIZED;
			}
		}
		// Set up an audio input stream using a .wav file and set the recognizer's input.
		CComPtr<ISpStream> cpInputStream;

		if (SUCCEEDED(hr))
		{
			hr = SPBindToFile(L"Test.wav", SPFM_OPEN_READONLY, &cpInputStream);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetInput(cpInputStream, TRUE);
		}

		// Create a new grammar and load an SRGS grammar from file.
		//CComPtr<ISpRecoGrammar> cpGrammar;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->CreateGrammar(0, &cpGrammar);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->LoadCmdFromFile(L"grammar.grxml", SPLO_STATIC);
		}

		// Set all top-level rules in the new grammar to the active state.
		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE);
		}

		// Finally, set the recognizer state to active to begin recognition.
		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_ACTIVE_ALWAYS);
		}

		 hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void     **)&pVoice);
		if (SUCCEEDED(hr)) {
			hr = SpEnumTokens(SPCAT_VOICES, L"Gender=Female", NULL, &pEnum);
			if (SUCCEEDED(hr))
			{
				// Get the number of voices.
				hr = pEnum->GetCount(&ulCount);
			}

			// Obtain a list of available voice tokens, set
			// the voice to the token, and call Speak.
			while (SUCCEEDED(hr) && ulCount--)			{
				if (pVoiceToken != nullptr) {
					pVoiceToken->Release();
				}

				if (SUCCEEDED(hr))
				{
					hr = pEnum->Next(1, &pVoiceToken, NULL);
				}

				if (SUCCEEDED(hr))
				{
					hr = pVoice->SetVoice(pVoiceToken);
				}

				if (SUCCEEDED(hr))
				{
					wchar_t* start = L"<?xml version=\"1.0\" encoding=\"ISO - 8859 - 1\"?><speak version = \"1.0\" xmlns = \"http://www.w3.org/2001/10/synthesis\"	xml:lang = \"en-US\">";
					wchar_t* end = L"</speak>";
					const wchar_t *xml = L"<voice required = \"Gender=Male\"> hi! <prosody pitch=\"fast\"> This is low pitch. </prosody><prosody volume=\"x - loud\"> This is extra loud volume. </prosody>";
					wstring s = start;
					s += xml;
					s += end;
					
					hr = pVoice->Speak(xml, SPF_IS_XML| SPF_ASYNC, 0);
					//hr = pVoice->Speak(L"How are you?", SPF_DEFAULT, NULL);
				}

			}
			/*
			if (SUCCEEDED(hr)) {
				hr = pEnum->Next(1, &pVoiceToken, NULL);
				if (SUCCEEDED(hr)) {
					hr = pVoice->SetVoice(pVoiceToken);
					// Set the output to the default audio device.
					if (SUCCEEDED(hr)) {
						hr = pVoice->SetOutput(NULL, TRUE);
						if (SUCCEEDED(hr)) {
							hr = pVoice->Speak(L"Hello, world!", SPF_DEFAULT, 0);
						}
					}
				}
			}
			*/
			pVoice->Release();
		}
		::CoUninitialize();
	}
예제 #16
0
//ルールベースで認識した結果の音声部分をもう一度 ディクテーションにかけます。
//これで過剰なマッチを排除します。
xreturn::r<std::string> Recognition_SAPI::convertDictation(ISpRecoResult* result,const std::string& ruleName)
{
	HRESULT hr;
	_USE_WINDOWS_ENCODING;

	CComPtr<ISpStreamFormat>	resultStream;
	{
		hr = result->GetAudio( 0, 1, &resultStream );
		if(FAILED(hr))	 return xreturn::windowsError(hr);

		//オーディオから読み込んでね
		hr = this->DictationEngine->SetInput( resultStream, TRUE);  
		if(FAILED(hr))	 return xreturn::windowsError(hr);

		hr = this->DictationGrammar->SetRuleState(ruleName.empty() ? NULL : _A2W(ruleName.c_str()), NULL, SPRS_ACTIVE );
		if(FAILED(hr))	 return xreturn::windowsError(hr);

		hr = this->DictationRecoCtxt->WaitForNotifyEvent(2000); //2秒タイムアウト
		if(FAILED(hr))	 return xreturn::windowsError(hr);

		hr = this->DictationGrammar->SetRuleState(NULL, NULL, SPRS_INACTIVE );
		if(FAILED(hr))	 return xreturn::windowsError(hr);

		{
			CSpEvent tempevent;
			hr = tempevent.GetFrom( this->DictationRecoCtxt );
			if(FAILED(hr))	 return xreturn::windowsError(hr);

			if (tempevent.eEventId == SPEI_RECOGNITION)
			{//認識した結果
				ISpRecoResult* tempresult;
				{
					tempresult = tempevent.RecoResult();

					//認識した文字列の取得
					CSpDynamicString tempdstrText;
					hr = tempresult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &tempdstrText, NULL);
					if(FAILED(hr))	 return xreturn::windowsError(hr);

					SPPHRASE *pPhrase;
					hr = tempresult->GetPhrase(&pPhrase);
					if ( FAILED(hr) )	return xreturn::windowsError(hr);

					double confidence = pPhrase->pElements->SREngineConfidence;

					std::string ret = _W2A(tempdstrText);
					this->PoolMainWindow->SyncInvokeLog(std::string() + "ディクテーションフィルター :" + ret + + " " + num2str(confidence),LOG_LEVEL_DEBUG);

					if (confidence <= 0.60)
					{
						this->PoolMainWindow->SyncInvokeLog(std::string() + "ディクテーションフィルター棄却",LOG_LEVEL_DEBUG);
						return "";
					}

					return ret;
				}
			}
		}
	}

	//不明
	return "";
}
예제 #17
0
//認識開始
void RSpeechRecognition::Listen() throw(RComException)
{
	USES_CONVERSION;
	HRESULT hr;

	CSpEvent event;

	//録音が終わるまで大待機
	hr = this->RecoCtxt->WaitForNotifyEvent(INFINITE);
	if ( FAILED(hr) )	throw RComException(hr , "WaitForNotifyEvent に失敗");

	hr = event.GetFrom( this->RecoCtxt );
	if ( FAILED(hr) )	throw RComException(hr , "GetFrom に失敗");

	//認識した結果
	ISpRecoResult* result;
	result = event.RecoResult();

	//認識した文字列の取得
	CSpDynamicString dstrText;
	hr = result->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);
	if ( FAILED(hr) )	throw RComException(hr , "録音したテキストの取得に失敗しました");
	this->ResultString = W2A(dstrText);

	//認識に XMLを使用した場合、代入された結果を得る.
	SPPHRASE *pPhrase;
	event.RecoResult()->GetPhrase(&pPhrase);
	const SPPHRASEPROPERTY *pProp;
	for (pProp = pPhrase->pProperties; pProp; pProp = pProp->pNextSibling)
	{
		string a = W2A(pProp->pszName);
		this->ResultMap[ W2A(pProp->pszName) ] = W2A(pProp->pszValue);
	}
	CoTaskMemFree(pPhrase);

/*
//デバッグのため、読み取った音声をwaveファイルに保存してみる。
	//ファイルに保存. save
	{
		CComPtr<ISpStreamFormat>	ResultStream;

		CComPtr<ISpVoice> voice;
		hr = this->RecoCtxt->GetVoice(&voice);
		if(FAILED(hr))	throw RComException(hr , "GetVoice に失敗");

		hr = event.RecoResult()->GetAudio( 0, 0, &ResultStream );
		if ( FAILED(hr) )	throw RComException(hr , "GetAudio に失敗しました");
		{
			CComPtr<ISpStream> cpWavStream; 
			CComPtr<ISpStreamFormat> cpOldStream; 
			CSpStreamFormat OriginalFmt; 
			voice->GetOutputStream( &cpOldStream ); 
			OriginalFmt.AssignFormat(cpOldStream); 
			hr = SPBindToFile( L"C:\\Users\\rti\\Desktop\\naichichi\\test\\output.wav",SPFM_CREATE_ALWAYS, 
				&cpWavStream,&OriginalFmt.FormatId(), 
				OriginalFmt.WaveFormatExPtr() 	); 
			voice->SetOutput(cpWavStream,TRUE); 
		}
	}
*/
}
예제 #18
0
/**********************************************************
* COperator::HandleCall *
*-----------------------*
*   Description:
*       Deals with the call
*   Return:
*       S_OK
*       Failed return value of ISpMMSysAudio::SetState(),
*           ISpVoice::Speak()
************************************************************/
HRESULT COperator::HandleCall()
{
    // PLEASE NOTE: This is a single-threaded app, so if the caller
    // hangs up after the call-handling sequence has started, the 
    // app will not be notified until after the entire call sequence 
    // has finished.  
    // If you want to be able to cut the call-handling short because
    // the caller hung up, you need to have a separate thread listening
    // for  TAPI's CS_DISCONNECT notification.

    _ASSERTE( m_cpMMSysAudioOut );
    HRESULT hr = S_OK;
      
    // Now that the call is connected, we can start up the audio output
    hr = m_cpOutgoingVoice->Speak( L"Hello, please say something to me", 0, NULL );

    // Start listening
    if ( SUCCEEDED( hr ) )
    {
        hr = m_cpDictGrammar->SetDictationState( SPRS_ACTIVE );
    }

    // We are expecting a PHRASESTART followed by either a RECOGNITION or a 
    // FALSERECOGNITION

    // Wait for the PHRASE_START
    CSpEvent event;
    WORD eLastEventID = SPEI_FALSE_RECOGNITION;
    hr = m_cpIncomingRecoCtxt->WaitForNotifyEvent(CALLER_TIMEOUT);
    if ( SUCCEEDED( hr ) )
    {
        hr = event.GetFrom( m_cpIncomingRecoCtxt );
    }

    // Enter this block only if we have not timed out (the user started speaking)
    if ( ( S_OK == hr ) && ( SPEI_PHRASE_START == event.eEventId ) )
    {
        // Caller has started to speak, block "forever" until the 
        // result (or lack thereof) comes back.
        // This is all right, since every PHRASE_START is guaranteed
        // to be followed up by a RECOGNITION or FALSE_RECOGNITION
        hr = m_cpIncomingRecoCtxt->WaitForNotifyEvent(INFINITE);

        if ( S_OK == hr )
        {
            // Get the RECOGNITION or FALSE_RECOGNITION 
            hr = event.GetFrom( m_cpIncomingRecoCtxt );
            eLastEventID = event.eEventId;

            // This had better be either a RECOGNITION or FALSERECOGNITION!
            _ASSERTE( (SPEI_RECOGNITION == event.eEventId) || 
                            (SPEI_FALSE_RECOGNITION == event.eEventId) );
        }
    }


    // Make sure a recognition result was actually received (as opposed to a false recognition
    // or timeout on the caller)
    WCHAR *pwszCoMemText = NULL;
    ISpRecoResult *pResult = NULL;
    if ( SUCCEEDED( hr ) && ( SPEI_RECOGNITION == event.eEventId ) )
    {
        // Get the text of the result
        pResult = event.RecoResult();

        BYTE bDisplayAttr;
        hr = pResult->GetText( SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, FALSE, &pwszCoMemText, &bDisplayAttr );
    }
    if ( SUCCEEDED( hr ) && pResult )
    {
        // Speak the result back locally
        m_cpLocalVoice->Speak( L"I think the person on the phone said", SPF_ASYNC, 0 );
        m_cpLocalVoice->Speak( pwszCoMemText, SPF_ASYNC, 0 );
        m_cpLocalVoice->Speak( L"when he said", SPF_ASYNC, 0 );
        
        // Get the audio so that the local voice can speak it back
        CComPtr<ISpStreamFormat> cpStreamFormat;
        HRESULT hrAudio = pResult->GetAudio( 0, 0, &cpStreamFormat );
        if ( SUCCEEDED( hrAudio ) )
        {
            m_cpLocalVoice->SpeakStream( cpStreamFormat, SPF_ASYNC, 0 );
        }
        else
        {
            m_cpLocalVoice->Speak( L"no audio was available", SPF_ASYNC, 0 );
        }
    }

    // Stop listening
    if ( SUCCEEDED( hr ) )
    {
        hr = m_cpDictGrammar->SetDictationState( SPRS_INACTIVE );
    }

    // Close the audio input so that we can open the audio output
    // (half-duplex device)
    if ( SUCCEEDED( hr ) )
    {
        hr = m_cpMMSysAudioIn->SetState( SPAS_CLOSED, 0 );
    }

    // The caller may have hung up on us, in which case we don't want to do 
    // the following
    if ( m_pCall )
    {
        if ( pResult )
        {
            // There's a result to playback
            if ( SUCCEEDED( hr ) )
            {
                hr = m_cpOutgoingVoice->Speak( L"I think I heard you say", 0, 0 );
            }
            if ( SUCCEEDED( hr ) )
            {
                hr = m_cpOutgoingVoice->Speak( pwszCoMemText, 0, 0 );
            }
            if ( SUCCEEDED( hr ) )
            {
                hr = m_cpOutgoingVoice->Speak( L"when you said", 0, 0 );
            }
            if ( SUCCEEDED( hr ) )
            {
                hr = pResult->SpeakAudio( NULL, 0, NULL, NULL );
            }
        }
        else
        {
            // Caller didn't say anything
            if ( SUCCEEDED( hr ) )
            {
                hr = m_cpOutgoingVoice->Speak( L"I don't believe you said anything!", 0, 0 );
            }
        }

        if ( SUCCEEDED( hr ) )
        {
            hr = m_cpOutgoingVoice->Speak( L"OK bye now", 0, 0 );
        }
    }
    else
    {
        m_cpLocalVoice->Speak( L"Prematurely terminated call", 0, 0 );
    }

    if ( pwszCoMemText )
    {
        ::CoTaskMemFree( pwszCoMemText );
    }

    return m_pCall ? hr : TAPI_E_DROPPED;
}   /* COperator::HandleCall */
예제 #19
0
//-----------------------------------------------------------------------------
// Purpose: Given a wave file and a string of words "text", creates a CFG from the
//  sentence and stores the resulting words/phonemes in CSentence
// Input  : *wavname - 
//			text - 
//			sentence - 
//			(*pfnPrint - 
// Output : SR_RESULT
//-----------------------------------------------------------------------------
SR_RESULT ExtractPhonemes( const char *wavname, CSpDynamicString& text, CSentence& sentence, void (*pfnPrint)( const char *fmt, ...) )
{
	// Assume failure
	SR_RESULT result = SR_RESULT_ERROR;

	if ( text.Length() <= 0 )
	{
		pfnPrint( "Error:  no rule / text specified\n" );
		return result;
	}

	USES_CONVERSION;
	HRESULT hr;
	
	CUtlVector < WORDRULETYPE > wordRules;

	CComPtr<ISpStream> cpInputStream;
	CComPtr<ISpRecognizer> cpRecognizer;
	CComPtr<ISpRecoContext> cpRecoContext;
	CComPtr<ISpRecoGrammar> cpRecoGrammar;
	CComPtr<ISpPhoneConverter>  cpPhoneConv;
    
	// Create basic SAPI stream object
	// NOTE: The helper SpBindToFile can be used to perform the following operations
	hr = cpInputStream.CoCreateInstance(CLSID_SpStream);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Stream object not installed?\n" );
		return result;
	}

	CSpStreamFormat sInputFormat;
	
	// setup stream object with wav file MY_WAVE_AUDIO_FILENAME
	//   for read-only access, since it will only be access by the SR engine
	hr = cpInputStream->BindToFile(
		T2W(wavname),
		SPFM_OPEN_READONLY,
		NULL,
		sInputFormat.WaveFormatExPtr(),
		SPFEI_ALL_EVENTS );

	if ( FAILED( hr ) )
	{
		pfnPrint( "Error: couldn't open wav file %s\n", wavname );
		return result;
	}
	
	// Create in-process speech recognition engine
	hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 In process recognizer object not installed?\n" );
		return result;
	}

	// Create recognition context to receive events
	hr = cpRecognizer->CreateRecoContext(&cpRecoContext);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to create recognizer context\n" );
		return result;
	}
	
	// Create a grammar
	hr = cpRecoContext->CreateGrammar( EP_GRAM_ID, &cpRecoGrammar );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to create recognizer grammar\n" );
		return result;
	}

	LANGID englishID = 0x409; // 1033 decimal

	bool userSpecified = false;
	LANGID langID = SpGetUserDefaultUILanguage();

	// Allow commandline override
	if ( CommandLine()->FindParm( "-languageid" ) != 0 )
	{
		userSpecified = true;
		langID = CommandLine()->ParmValue( "-languageid", langID );
	}

	// Create a phoneme converter ( so we can convert to IPA codes )
	hr = SpCreatePhoneConverter( langID, NULL, NULL, &cpPhoneConv );
	if ( FAILED( hr ) )
	{
		if ( langID != englishID )
		{
			if ( userSpecified )
			{
				pfnPrint( "Warning:  SAPI 5.1 Unable to create phoneme converter for command line override -languageid %i\n", langID );
			}
			else
			{
				pfnPrint( "Warning:  SAPI 5.1 Unable to create phoneme converter for default UI language %i\n",langID );
			}

			// Try english!!!
			langID = englishID;
			hr = SpCreatePhoneConverter( langID, NULL, NULL, &cpPhoneConv );
		}

		if ( FAILED( hr ) )
		{
			pfnPrint( "Error:  SAPI 5.1 Unable to create phoneme converter for English language id %i\n", langID );
			return result;
		}
		else
		{
			pfnPrint( "Note:  SAPI 5.1 Falling back to use english -languageid %i\n", langID );
		}
	}
	else if ( userSpecified )
	{
		pfnPrint( "Note:  SAPI 5.1 Using user specified -languageid %i\n",langID );
	}

	SPSTATEHANDLE hStateRoot;
	// create/re-create Root level rule of grammar
	hr = cpRecoGrammar->GetRule(L"Root", 0, SPRAF_TopLevel | SPRAF_Active, TRUE, &hStateRoot);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to create root rule\n" );
		return result;
	}

	// Inactivate it so we can alter it
	hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_INACTIVE );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to deactivate grammar rules\n" );
		return result;
	}

	// Create the rule set from the words in text
	{
		CSpDynamicString currentWord;
		WCHAR *pos = ( WCHAR * )text;
		WCHAR str[ 2 ];
		str[1]= 0;

		while ( *pos )
		{
			if ( *pos == L' ' /*|| *pos == L'.' || *pos == L'-'*/ )
			{
				// Add word to rule set
				if ( currentWord.Length() > 0 )
				{
					AddWordRule( cpRecoGrammar, &hStateRoot, &wordRules, currentWord );
					currentWord.Clear();
				}
				pos++;
				continue;
			}

			// Skip anything that's inside a [ xxx ] pair.
			if ( *pos == L'[' )
			{
				while ( *pos && *pos != L']' )
				{
					pos++;
				}

				if ( *pos )
				{
					pos++;
				}
				continue;
			}

			str[ 0 ] = *pos;

			currentWord.Append( str );
			pos++;
		}

		if ( currentWord.Length() > 0 )
		{
			AddWordRule( cpRecoGrammar, &hStateRoot, &wordRules, currentWord );
		}

		if ( wordRules.Size() <= 0 )
		{
			pfnPrint( "Error:  Text %s contained no usable words\n", text );
			return result;
		}

		// Build all word to word transitions in the grammar
		if ( !BuildRules( cpRecoGrammar, &hStateRoot, &wordRules ) )
		{
			pfnPrint( "Error:  Rule set for %s could not be generated\n", text );
			return result;
		}
	}

	// check for recognitions and end of stream event
	const ULONGLONG ullInterest = 
		SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM) | SPFEI(SPEI_FALSE_RECOGNITION) | 
		SPFEI(SPEI_PHRASE_START ) | SPFEI(SPEI_HYPOTHESIS ) | SPFEI(SPEI_INTERFERENCE) ;
	hr = cpRecoContext->SetInterest( ullInterest, ullInterest );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to set interest level\n" );
		return result;
	}	
	// use Win32 events for command-line style application
	hr = cpRecoContext->SetNotifyWin32Event();
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to set win32 notify event\n" );
		return result;
	}
	// connect wav input to recognizer
	// SAPI will negotiate mismatched engine/input audio formats using system audio codecs, so second parameter is not important - use default of TRUE
	hr = cpRecognizer->SetInput(cpInputStream, TRUE);
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to associate input stream\n" );
		return result;
	}	

	// Activate the CFG ( rather than using dictation )
	hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_ACTIVE );
	if ( FAILED( hr ) )
	{
		switch ( hr )
		{
		case E_INVALIDARG:
			pfnPrint( "pszName is invalid or bad. Alternatively, pReserved is non-NULL\n" );
			break;
		case SP_STREAM_UNINITIALIZED:
			pfnPrint( "ISpRecognizer::SetInput has not been called with the InProc recognizer\n" );
			break;
		case SPERR_UNINITIALIZED:
			pfnPrint( "The object has not been properly initialized.\n");
			break;
		case SPERR_UNSUPPORTED_FORMAT:
			pfnPrint( "Audio format is bad or is not recognized. Alternatively, the device driver may be busy by another application and cannot be accessed.\n" );
			break;
		case SPERR_NOT_TOPLEVEL_RULE:
			pfnPrint( "The rule pszName exists, but is not a top-level rule.\n" );
			break;
		default:
			pfnPrint( "Unknown error\n" );
			break;
		}
		pfnPrint( "Error:  SAPI 5.1 Unable to activate rule set\n" );
		return result;
	}

	// while events occur, continue processing
	// timeout should be greater than the audio stream length, or a reasonable amount of time expected to pass before no more recognitions are expected in an audio stream
	BOOL fEndStreamReached = FALSE;
	while (!fEndStreamReached && S_OK == cpRecoContext->WaitForNotifyEvent( SR_WAVTIMEOUT ))
	{
		CSpEvent spEvent;
		// pull all queued events from the reco context's event queue
		
		while (!fEndStreamReached && S_OK == spEvent.GetFrom(cpRecoContext))
		{
			// Check event type
			switch (spEvent.eEventId)
			{
			case SPEI_INTERFERENCE:
				{
					SPINTERFERENCE interference = spEvent.Interference();

					switch ( interference )
					{
					case SPINTERFERENCE_NONE:
						pfnPrint( "[ I None ]\r\n" );
						break;
					case SPINTERFERENCE_NOISE:
						pfnPrint( "[ I Noise ]\r\n" );
						break;
					case SPINTERFERENCE_NOSIGNAL:
						pfnPrint( "[ I No Signal ]\r\n" );
						break;
					case SPINTERFERENCE_TOOLOUD:
						pfnPrint( "[ I Too Loud ]\r\n" );
						break;
					case SPINTERFERENCE_TOOQUIET:
						pfnPrint( "[ I Too Quiet ]\r\n" );
						break;
					case SPINTERFERENCE_TOOFAST:
						pfnPrint( "[ I Too Fast ]\r\n" );
						break;
					case SPINTERFERENCE_TOOSLOW:
						pfnPrint( "[ I Too Slow ]\r\n" );
						break;
					default:
						break;
					}
				}
				break;
			case SPEI_PHRASE_START:
				pfnPrint( "Phrase Start\r\n" );
				sentence.MarkNewPhraseBase();
				break;

			case SPEI_HYPOTHESIS:
			case SPEI_RECOGNITION:
			case SPEI_FALSE_RECOGNITION:
				{
                    CComPtr<ISpRecoResult> cpResult;
                    cpResult = spEvent.RecoResult();

                    CSpDynamicString dstrText;
                    if (spEvent.eEventId == SPEI_FALSE_RECOGNITION)
                    {
                        dstrText = L"(Unrecognized)";

						result = SR_RESULT_FAILED;

						// It's possible that the failed recog might have more words, so see if that's the case
						EnumeratePhonemes( cpPhoneConv, cpResult, sentence );
					}
                    else
                    {
						// Hypothesis or recognition success
                        cpResult->GetText( (ULONG)SP_GETWHOLEPHRASE, (ULONG)SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);

						EnumeratePhonemes( cpPhoneConv, cpResult, sentence );

						if ( spEvent.eEventId == SPEI_RECOGNITION )
						{
							result = SR_RESULT_SUCCESS;
						}

						pfnPrint( va( "%s%s\r\n", spEvent.eEventId == SPEI_HYPOTHESIS ? "[ Hypothesis ] " : "", dstrText.CopyToChar() ) );
					}
                    
                    cpResult.Release();
				}
				break;
				// end of the wav file was reached by the speech recognition engine
            case SPEI_END_SR_STREAM:
				fEndStreamReached = TRUE;
				break;
			}
			
			// clear any event data/object references
			spEvent.Clear();
		}// END event pulling loop - break on empty event queue OR end stream
	}// END event polling loop - break on event timeout OR end stream
	
	// Deactivate rule
	hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_INACTIVE );
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to deactivate rule set\n" );
		return result;
	}

	// close the input stream, since we're done with it
	// NOTE: smart pointer will call SpStream's destructor, and consequently ::Close, but code may want to check for errors on ::Close operation
	hr = cpInputStream->Close();
	if ( FAILED( hr ) )
	{
		pfnPrint( "Error:  SAPI 5.1 Unable to close input stream\n" );
		return result;
	}

	return result;
}
예제 #20
0
bool SpeechRecognizerModule::updateModule()
{
    cout<<".";
    USES_CONVERSION;
    CSpEvent event;

    // Process all of the recognition events
    while (event.GetFrom(m_cpRecoCtxt) == S_OK)
    {
        switch (event.eEventId)
        {
            case SPEI_SOUND_START:
                {
                    m_bInSound = TRUE;
				    yInfo() << "Sound in...";
                    break;
                }

            case SPEI_SOUND_END:
                if (m_bInSound)
                {
                    m_bInSound = FALSE;
                    if (!m_bGotReco)
                    {
                        // The sound has started and ended, 
                        // but the engine has not succeeded in recognizing anything
						yWarning() << "Chunk of sound detected: Recognition is null";
                    }
                    m_bGotReco = FALSE;
                }
                break;

            case SPEI_RECOGNITION:
                // There may be multiple recognition results, so get all of them
                {
                    m_bGotReco = TRUE;
                    static const WCHAR wszUnrecognized[] = L"<Unrecognized>";

                    CSpDynamicString dstrText;
                    if (SUCCEEDED(event.RecoResult()->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, 
                                                            &dstrText, NULL)))
                    {
						SPPHRASE* pPhrase = NULL;
						bool successGetPhrase = SUCCEEDED(event.RecoResult()->GetPhrase(&pPhrase));  
                        int confidence=pPhrase->Rule.Confidence;
						
						string fullSentence = ws2s(dstrText);
						yInfo() <<"Recognized "<<fullSentence<<" with confidence "<<confidence ;
                        

                        //Send over yarp
                        Bottle bOut;
                        bOut.addString(fullSentence.c_str());
                        bOut.addInt(confidence);
                        m_portContinuousRecognition.write(bOut);

                        //Treat the semantic
                        if (successGetPhrase)
                        {
                            //Send sound
                            if (m_forwardSound)
                            {
                                yarp::sig::Sound& rawSnd = m_portSound.prepare();
                                rawSnd = toSound(event.RecoResult());
                                m_portSound.write();
                            }

                            //--------------------------------------------------- 1. 1st subBottle : raw Sentence -----------------------------------------------//
                            int wordCount = pPhrase->Rule.ulCountOfElements;
                            string rawPhrase = "";
                            for(int i=0; i< wordCount; i++){
                                rawPhrase += ws2s(pPhrase->pElements[i].pszDisplayText) + " ";
                                yDebug() << "word : " <<  ws2s(pPhrase->pElements[i].pszDisplayText) ;
                            }
                            yInfo() <<"Raw sentence: "<<rawPhrase ;
                            if (&pPhrase->Rule == NULL)
                            {
                                yError() <<"Cannot parse the sentence!";
                                return true;
                            }
                            //--------------------------------------------------- 2. 2nd subottle : Word/Role ---------------------------------------------------//
                            Bottle bOutGrammar;
                            bOutGrammar.addString(rawPhrase.c_str());
                            bOutGrammar.addList()=toBottle(pPhrase,&pPhrase->Rule);
							yInfo() << "Sending semantic bottle : " << bOutGrammar.toString().c_str() ;
                            m_portContinuousRecognitionGrammar.write(bOutGrammar);
                            ::CoTaskMemFree(pPhrase);
                        }

                        if (m_useTalkBack)
                            say(fullSentence);
                    }
                }
                break;
        }
    }
    return true;
}
예제 #21
0
LRESULT CALLBACK WndProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam)
{
	HDC           hdc;
	PAINTSTRUCT   ps;

	switch (message)
	{
	case WM_CREATE:
	{
		//初始化COM端口
		::CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);
		//创建识别引擎COM实例为共享型
		HRESULT hr = m_cpRecoEngine.CoCreateInstance(CLSID_SpSharedRecognizer);
		//创建识别上下文接口
		if (SUCCEEDED(hr))
		{
			hr = m_cpRecoEngine->CreateRecoContext(&m_cpRecoCtxt);
		}
		else MessageBox(hwnd, TEXT("error1"), TEXT("error"), S_OK);
		//设置识别消息,使计算机时刻监听语音消息
		if (SUCCEEDED(hr))
		{
			hr = m_cpRecoCtxt->SetNotifyWindowMessage(hwnd, WM_RECOEVENT, 0, 0);
		}
		else MessageBox(hwnd, TEXT("error2"), TEXT("error"), S_OK);
		//设置我们感兴趣的事件
		if (SUCCEEDED(hr))
		{
			ULONGLONG ullMyEvents = SPFEI(SPEI_SOUND_START) | SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_SOUND_END);
			hr = m_cpRecoCtxt->SetInterest(ullMyEvents, ullMyEvents);
		}
		else MessageBox(hwnd, TEXT("error3"), TEXT("error"), S_OK);
		//创建语法规则
		b_Cmd_Grammar = TRUE;
		if (FAILED(hr))
		{
			MessageBox(hwnd, TEXT("error4"), TEXT("error"), S_OK);
		}
		hr = m_cpRecoCtxt->CreateGrammar(GID_CMD_GR, &m_cpCmdGramma);
		WCHAR wszXMLFile[20] = L"er.xml";
		MultiByteToWideChar(CP_ACP, 0, (LPCSTR)"er.xml", -1, wszXMLFile, 256);
		hr = m_cpCmdGramma->LoadCmdFromFile(wszXMLFile, SPLO_DYNAMIC);
		if (FAILED(hr))
		{
			MessageBox(hwnd, TEXT("error5"), TEXT("error"), S_OK);
		}
		b_initSR = TRUE;
		//在开始识别时,激活语法进行识别
		hr = m_cpCmdGramma->SetRuleState(NULL, NULL, SPRS_ACTIVE);
		return 0;
	}
	case WM_RECOEVENT:
	{
		RECT rect;
		GetClientRect(hwnd, &rect);
		hdc = GetDC(hwnd);
		USES_CONVERSION;
		CSpEvent event;
		while (event.GetFrom(m_cpRecoCtxt) == S_OK)
		{
			switch (event.eEventId)
			{
			case SPEI_RECOGNITION:
			{
				static const WCHAR wszUnrecognized[] = L"<Unrecognized>";
				CSpDynamicString dstrText;
				//取得识别结果
				if (FAILED(event.RecoResult()->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL)))
				{
					dstrText = wszUnrecognized;
				}
				BSTR SRout;
				dstrText.CopyToBSTR(&SRout);
				char* lpszText2 = _com_util::ConvertBSTRToString(SRout);

				if (b_Cmd_Grammar)
				{
					if (strstr("打开企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("打开企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						openQQ();

					}

					if (strstr("关闭企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("关闭企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						closeQQ();
					}

					if (strstr("隐藏企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("隐藏企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						yincangQQ();
					}

					if (strstr("显示企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("显示企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						showQQ();
					}

					if (strstr("上移企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("上移企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						top();
					}

					if (strstr("下移企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("下移企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						bottom();
					}

					if (strstr("左移企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("左移企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						left();
					}

					if (strstr("右移企鹅", lpszText2) != NULL)
					{
						DrawText(hdc, TEXT("右移企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER);
						right();
					}
				}
			}
			}
		}
		return TRUE;
	}
	case WM_PAINT:
		hdc = BeginPaint(hwnd, &ps);
		EndPaint(hwnd, &ps);
		return 0;
	case WM_DESTROY:
		PostQuitMessage(0);
		return 0;
	}
	return DefWindowProc(hwnd, message, wParam, lParam);
}
예제 #22
0
//認識したときに呼ばれるコールバック
xreturn::r<bool> Recognition_SAPI::CallbackReco()
{
	HRESULT hr;
	//平均認識率
	double SREngineConfidenceAvg;
	//正規表現キャプチャ
	std::map<std::string , std::string> capture;
	//呼びかけの部分の信頼度を取得する.
	double yobikakeEngineConfidence;
	//コールバックIDの取得
	unsigned int funcID;
	//テンポラリルールかどうか。
	bool isTemporaryRule;
	//ルールでマッチしたものをディクテーション認識させた時の結果
	std::string dictationString;
	//マッチした文字列全体
	std::string matchString;

	//マッチした結果を取得し分析します。
	{
		CSpEvent ruleEvent;
		hr = ruleEvent.GetFrom( this->RuleRecoCtxt );
		if ( FAILED(hr) )	return xreturn::windowsError(hr);

		if ( ruleEvent.eEventId != SPEI_RECOGNITION )
		{
			return false;
		}
		this->PoolMainWindow->SyncInvokeLog("SPEI_RECOGNITION" ,LOG_LEVEL_DEBUG);

		{
			//認識した結果
			ISpRecoResult* result;
			result = ruleEvent.RecoResult();

			SPPHRASE *pPhrase;
			hr = result->GetPhrase(&pPhrase);
			if ( FAILED(hr) )	return xreturn::windowsError(hr);

			PhraseTo phraseTo(pPhrase);
			if (phraseTo.IsError())
			{
				this->PoolMainWindow->AsyncInvoke( [=](){
					this->PoolMainWindow->ScriptManager.BadVoiceRecogntion(-5,"","",0,0,false); 
				} );
				return false;
			}

			//平均認識率
			SREngineConfidenceAvg = phraseTo.GetSREngineConfidenceAvg();
			//正規表現キャプチャ
			capture = phraseTo.GetRegexpCapture();
			//呼びかけの部分の信頼度を取得する.
			yobikakeEngineConfidence = phraseTo.GetYobikakeEngineConfidence();
			//コールバックIDの取得
			funcID = phraseTo.GetFuncID();
			//テンポラリルール?
			isTemporaryRule = phraseTo.IsTemporaryRule();
			//マッチした文字列
			matchString = phraseTo.GetAllString();
			if ( !isTemporaryRule )
			{//ルールでマッチしたものをディクテーション認識させてみる。
//				dictationString = this->convertDictation(result,"FilterRule");
//				if ( ! this->checkDictation(dictationString) )
//				{
//					dictationString = this->convertDictation(result,"FilterRule2");
//					if ( ! this->checkDictation(dictationString) )
//					{
						dictationString = this->convertDictation(result,"");
//					}
//				}
			}
		}
	}

	if ( funcID == UINT_MAX || funcID >= this->CallbackDictionary.size()  )
	{//コールバックしようがないマッチは異常。
		return xreturn::error("マッチした後のコールバック関数ID " + num2str(funcID) + " が存在しません" );
	}


	if ( isTemporaryRule )
	{//テンポラリルール
		if (SREngineConfidenceAvg < this->TemporaryRuleConfidenceFilter)
		{//BAD
			this->PoolMainWindow->AsyncInvoke( [=](){
				this->PoolMainWindow->ScriptManager.BadVoiceRecogntion
					(-1,matchString,"",0,SREngineConfidenceAvg,false);
			} );
			return false;
		}
		//上手くマッチしたらのでコールバックする
		this->PoolMainWindow->SyncInvokePopupMessage("音声認識",matchString);
		this->PoolMainWindow->AsyncInvoke( [=](){
			this->PoolMainWindow->ScriptManager.VoiceRecogntion
				(this->CallbackDictionary[funcID],capture,"",0,SREngineConfidenceAvg);
		} );
		return true;
	}
	
	//ディクテーションチェック
	bool dictationCheck = this->checkDictation(dictationString);
	if (this->UseDictationFilter)
	{
		if (! dictationCheck )
		{//ディクテーションチェックの結果エラーになった
			this->PoolMainWindow->AsyncInvoke( [=](){
				this->PoolMainWindow->ScriptManager.BadVoiceRecogntion
					(-2,matchString,dictationString,yobikakeEngineConfidence,SREngineConfidenceAvg,dictationCheck);
			} );
			return false;
		}
	}

	//呼びかけの部分の信頼度
	if (yobikakeEngineConfidence <  this->YobikakeRuleConfidenceFilter )
	{//呼びかけの信頼度が足りない
		this->PoolMainWindow->AsyncInvoke( [=](){
			this->PoolMainWindow->ScriptManager.BadVoiceRecogntion
				(-3,matchString,dictationString,yobikakeEngineConfidence,SREngineConfidenceAvg,dictationCheck);
		} );
		return false;
	}

	//全体を通しての信頼度
	if (SREngineConfidenceAvg <  this->BasicRuleConfidenceFilter )
	{//全体を通しての信頼度が足りない
		this->PoolMainWindow->AsyncInvoke( [=](){
			this->PoolMainWindow->ScriptManager.BadVoiceRecogntion
				(-4,matchString,dictationString,yobikakeEngineConfidence,SREngineConfidenceAvg,dictationCheck);
		} );
		return false;
	}

	//マッチしたのでコールバックする
	this->PoolMainWindow->SyncInvokePopupMessage("音声認識",matchString);
	this->PoolMainWindow->AsyncInvoke( [=](){
		this->PoolMainWindow->ScriptManager.VoiceRecogntion
			(this->CallbackDictionary[funcID],capture,dictationString,yobikakeEngineConfidence,SREngineConfidenceAvg);
	} );
	return true;
}