/***************************************************************************************** * CSimpleDict::RecoEvent() * Called whenever the dialog process is notified of a recognition event. * Inserts whatever is recognized into the edit box. ******************************************************************************************/ void CSimpleDict::RecoEvent() { USES_CONVERSION; CSpEvent event; // Process all of the recognition events while (event.GetFrom(m_cpRecoCtxt) == S_OK) { switch (event.eEventId) { case SPEI_SOUND_START: m_bInSound = TRUE; break; case SPEI_SOUND_END: if (m_bInSound) { m_bInSound = FALSE; if (!m_bGotReco) { // The sound has started and ended, // but the engine has not succeeded in recognizing anything const TCHAR szNoise[] = _T("<noise>"); ::SendDlgItemMessage( m_hDlg, IDC_EDIT_DICT, EM_REPLACESEL, TRUE, (LPARAM) szNoise ); } m_bGotReco = FALSE; } break; case SPEI_RECOGNITION: // There may be multiple recognition results, so get all of them { m_bGotReco = TRUE; static const WCHAR wszUnrecognized[] = L"<Unrecognized>"; CSpDynamicString dstrText; if (FAILED(event.RecoResult()->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL))) { dstrText = wszUnrecognized; } // Concatenate a space onto the end of the recognized word dstrText.Append(L" "); ::SendDlgItemMessage( m_hDlg, IDC_EDIT_DICT, EM_REPLACESEL, TRUE, (LPARAM) W2T(dstrText) ); } break; } } }
void CMyDlg::OnRecoEvent() { //CMyPackManApp *pApp = (CMyPackManApp *)AfxGetApp(); //View -> App CMainFrame *pMain = (CMainFrame *)AfxGetMainWnd(); //View -> MainFrm CMyPackManView *pView = (CMyPackManView *)pMain->GetActiveView(); USES_CONVERSION; CSpEvent event; //MessageBox(L"A"); while (event.GetFrom(m_cpRecoCtxt) == S_OK) { switch (event.eEventId) { case SPEI_RECOGNITION: { m_bReco = TRUE; static const WCHAR wszUnrecognized[] = L"fail"; CSpDynamicString dstrText; if (FAILED(event.RecoResult()->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL))) { dstrText = wszUnrecognized; } dstrText.Append(L" "); m_Edit = dstrText; //if (m_Edit == "up " || m_Edit == "Up " || m_Edit == "Down " || m_Edit == "down " || m_Edit == "left " || m_Edit == "Left " || m_Edit == "Right " || m_Edit == "right " // || m_Edit == "let " || m_Edit == "light " || m_Edit == "night ") // pView->m_EditV = m_Edit; ////::SendDlgItemMessage(m_hWnd, IDC_EDIT_DICT, EM_REPLACESEL, TRUE, (LPARAM)W2T(dstrText)); //UpdateData(TRUE); //m_pView->Invalidate(); if (m_Edit == "down " || m_Edit == "Down ") m_pView->mKey = DOWN; else if (m_Edit == "up " || m_Edit == "Up ") m_pView->mKey = UP; else if (m_Edit == "one " || m_Edit == "One ") m_pView->mKey = LEFT; else if (m_Edit == "two " || m_Edit == "Two ") m_pView->mKey = RITE; UpdateData(TRUE); //UpdateData(FALSE); } break; } } }
//----------------------------------------------------------------------------- // Purpose: Debugging, prints alternate list if one is created // Input : cpResult - // (*pfnPrint - //----------------------------------------------------------------------------- void PrintAlternates( ISpRecoResult* cpResult, void (*pfnPrint)( const char *fmt, ... ) ) { ISpPhraseAlt *rgPhraseAlt[ 32 ]; memset( rgPhraseAlt, 0, sizeof( rgPhraseAlt ) ); ULONG ulCount; ISpPhrase *phrase = ( ISpPhrase * )cpResult; if ( phrase ) { SPPHRASE *pElements; if ( SUCCEEDED( phrase->GetPhrase( &pElements ) ) ) { if ( pElements->Rule.ulCountOfElements > 0 ) { HRESULT hr = cpResult->GetAlternates( pElements->Rule.ulFirstElement, pElements->Rule.ulCountOfElements, 32, rgPhraseAlt, &ulCount); Assert( !FAILED( hr ) ); for ( ULONG r = 0 ; r < ulCount; r++ ) { CSpDynamicString dstrText; hr = rgPhraseAlt[ r ]->GetText( (ULONG)SP_GETWHOLEPHRASE, (ULONG)SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL); Assert( !FAILED( hr ) ); pfnPrint( "[ ALT ]" ); pfnPrint( dstrText.CopyToChar() ); pfnPrint( "\r\n" ); } } } } for ( int i = 0; i < 32; i++ ) { if ( rgPhraseAlt[ i ] ) { rgPhraseAlt[ i ]->Release(); rgPhraseAlt[ i ] = NULL; } } }
int voiceRecognition(string Qtext) { HRESULT hr = E_FAIL; int word=0; CSpDynamicString Qtextout; if (SUCCEEDED(hr = ::CoInitialize(NULL))) { { CComPtr<ISpRecoContext> cpRecoCtxt; CComPtr<ISpRecoGrammar> cpGrammar; CComPtr<ISpVoice> cpVoice; hr = cpRecoCtxt.CoCreateInstance(CLSID_SpSharedRecoContext); if (SUCCEEDED(hr)) { hr = cpRecoCtxt->GetVoice(&cpVoice); } if (cpRecoCtxt && cpVoice && SUCCEEDED(hr = cpRecoCtxt->SetNotifyWin32Event()) && SUCCEEDED(hr = cpRecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION))) && SUCCEEDED(hr = cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, NULL, NULL)) && SUCCEEDED(hr = cpRecoCtxt->CreateGrammar(0, &cpGrammar)) && SUCCEEDED(hr = cpGrammar->LoadDictation(NULL, SPLO_STATIC)) && SUCCEEDED(hr = cpGrammar->SetDictationState(SPRS_ACTIVE))) { USES_CONVERSION; CComPtr<ISpRecoResult> cpResult; Qtextout.operator=(Qtext.c_str()); cpVoice->Speak(Qtextout, SPF_ASYNC, NULL); if (SUCCEEDED(hr = BlockForResult(cpRecoCtxt, &cpResult))) { cpGrammar->SetDictationState(SPRS_INACTIVE); CSpDynamicString dstrText; if (SUCCEEDED(cpResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL))) { cpResult.Release(); } if (_wcsicmp(dstrText, L"True") == 0) { word = 1; //break; } if (_wcsicmp(dstrText, L"Two") == 0) { word = 1; //break; } if (_wcsicmp(dstrText, L"False") == 0) { word = 2; //break; } if (_wcsicmp(dstrText, L"Falls") == 0) { word = 2; //break; } if (_wcsicmp(dstrText, L"Follows") == 0) { word = 2; //break; } if (_wcsicmp(dstrText, L"A") == 0) { word = 3; //break; } if (_wcsicmp(dstrText, L"Eight") == 0) { word = 3; //break; } if (_wcsicmp(dstrText, L"B") == 0) { word = 4; //break; } if (_wcsicmp(dstrText, L"Bee") == 0) { word = 4; //break; } if (_wcsicmp(dstrText, L"C") == 0) { word = 5; //break; } if (_wcsicmp(dstrText, L"See") == 0) { word = 5; //break; } if (_wcsicmp(dstrText, L"Fire") == 0) { word = 6; //break; } if (_wcsicmp(dstrText, L"Leave") == 0) { word = 7; //break; } if (_wcsicmp(dstrText, L"Leave it") == 0) { word = 7; //break; } if (_wcsicmp(dstrText, L"We've") == 0) { word = 7; //break; } if (_wcsicmp(dstrText, L"Quit") == 0) { word = 7; //break; } if (_wcsicmp(dstrText, L"Quits") == 0) { word = 7; //break; } if (_wcsicmp(dstrText, L"Switch") == 0) { word = 8; //break; } if (_wcsicmp(dstrText, L"Change") == 0) { word = 8; //break; } cout << dstrText.CopyToChar(); cpGrammar->SetDictationState(SPRS_ACTIVE); } } } ::CoUninitialize(); } return word; }
LRESULT CALLBACK WndProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM lParam) { HDC hdc; PAINTSTRUCT ps; switch (message) { case WM_CREATE: { //初始化COM端口 ::CoInitializeEx(NULL, COINIT_APARTMENTTHREADED); //创建识别引擎COM实例为共享型 HRESULT hr = m_cpRecoEngine.CoCreateInstance(CLSID_SpSharedRecognizer); //创建识别上下文接口 if (SUCCEEDED(hr)) { hr = m_cpRecoEngine->CreateRecoContext(&m_cpRecoCtxt); } else MessageBox(hwnd, TEXT("error1"), TEXT("error"), S_OK); //设置识别消息,使计算机时刻监听语音消息 if (SUCCEEDED(hr)) { hr = m_cpRecoCtxt->SetNotifyWindowMessage(hwnd, WM_RECOEVENT, 0, 0); } else MessageBox(hwnd, TEXT("error2"), TEXT("error"), S_OK); //设置我们感兴趣的事件 if (SUCCEEDED(hr)) { ULONGLONG ullMyEvents = SPFEI(SPEI_SOUND_START) | SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_SOUND_END); hr = m_cpRecoCtxt->SetInterest(ullMyEvents, ullMyEvents); } else MessageBox(hwnd, TEXT("error3"), TEXT("error"), S_OK); //创建语法规则 b_Cmd_Grammar = TRUE; if (FAILED(hr)) { MessageBox(hwnd, TEXT("error4"), TEXT("error"), S_OK); } hr = m_cpRecoCtxt->CreateGrammar(GID_CMD_GR, &m_cpCmdGramma); WCHAR wszXMLFile[20] = L"er.xml"; MultiByteToWideChar(CP_ACP, 0, (LPCSTR)"er.xml", -1, wszXMLFile, 256); hr = m_cpCmdGramma->LoadCmdFromFile(wszXMLFile, SPLO_DYNAMIC); if (FAILED(hr)) { MessageBox(hwnd, TEXT("error5"), TEXT("error"), S_OK); } b_initSR = TRUE; //在开始识别时,激活语法进行识别 hr = m_cpCmdGramma->SetRuleState(NULL, NULL, SPRS_ACTIVE); return 0; } case WM_RECOEVENT: { RECT rect; GetClientRect(hwnd, &rect); hdc = GetDC(hwnd); USES_CONVERSION; CSpEvent event; while (event.GetFrom(m_cpRecoCtxt) == S_OK) { switch (event.eEventId) { case SPEI_RECOGNITION: { static const WCHAR wszUnrecognized[] = L"<Unrecognized>"; CSpDynamicString dstrText; //取得识别结果 if (FAILED(event.RecoResult()->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL))) { dstrText = wszUnrecognized; } BSTR SRout; dstrText.CopyToBSTR(&SRout); char* lpszText2 = _com_util::ConvertBSTRToString(SRout); if (b_Cmd_Grammar) { if (strstr("打开企鹅", lpszText2) != NULL) { DrawText(hdc, TEXT("打开企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER); openQQ(); } if (strstr("关闭企鹅", lpszText2) != NULL) { DrawText(hdc, TEXT("关闭企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER); closeQQ(); } if (strstr("隐藏企鹅", lpszText2) != NULL) { DrawText(hdc, TEXT("隐藏企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER); yincangQQ(); } if (strstr("显示企鹅", lpszText2) != NULL) { DrawText(hdc, TEXT("显示企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER); showQQ(); } if (strstr("上移企鹅", lpszText2) != NULL) { DrawText(hdc, TEXT("上移企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER); top(); } if (strstr("下移企鹅", lpszText2) != NULL) { DrawText(hdc, TEXT("下移企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER); bottom(); } if (strstr("左移企鹅", lpszText2) != NULL) { DrawText(hdc, TEXT("左移企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER); left(); } if (strstr("右移企鹅", lpszText2) != NULL) { DrawText(hdc, TEXT("右移企鹅"), -1, &rect, DT_SINGLELINE | DT_CENTER | DT_VCENTER); right(); } } } } } return TRUE; } case WM_PAINT: hdc = BeginPaint(hwnd, &ps); EndPaint(hwnd, &ps); return 0; case WM_DESTROY: PostQuitMessage(0); return 0; } return DefWindowProc(hwnd, message, wParam, lParam); }
void CASRwrapper::GetText(std::wstring& speechRes, float* pconfidence, int requestedAlternates, std::wstring alternates[], float alternatesConfidence[]) { //HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&m_pVoice); //hr = m_pVoice->Speak(speechRes, 0, NULL); //m_pVoice->Release(); //m_pVoice = NULL; const ULONG maxEvents = 10; SPEVENT events[maxEvents]; ULONG eventCount; HRESULT hr; hr = m_cpRecoCtxt->GetEvents(maxEvents, events, &eventCount); // Warning hr equal S_FALSE if everything is OK // but eventCount < requestedEventCount if (!(hr == S_OK || hr == S_FALSE)) { return; } if (eventCount > 1) { speechRes.assign(L"More than one event!"); return; } ISpRecoResult* recoResult; recoResult = reinterpret_cast<ISpRecoResult*>(events[0].lParam); wchar_t* text; hr = recoResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, FALSE, &text, NULL); speechRes.assign(text); //if (confidence != NULL) //*confidence = recoResult->->pElements->SREngineConfidence;; CoTaskMemFree(text); if (requestedAlternates == 0 && pconfidence == NULL) return; const USHORT MAX_ALTERNATES = 100; if (requestedAlternates > MAX_ALTERNATES) requestedAlternates = MAX_ALTERNATES; if (requestedAlternates == 0) //in case asked for confidence. i.e., pconfidence!=NULL requestedAlternates = 1; CComPtr<ISpPhraseAlt> pcpPhraseAlt[MAX_ALTERNATES]; SPPHRASE* pPhrase; std::string betterResult; float ConfidenceMax = 0.0; ULONG ulCount; //std::list<std::string> lWordsRec; // Retrieve information about the recognized phrase hr = recoResult->GetPhrase(&pPhrase); if (SUCCEEDED(hr)) { // Retrieve a list of alternative phrases related to the recognized phrase hr = recoResult->GetAlternates(pPhrase->Rule.ulFirstElement, pPhrase->Rule.ulCountOfElements, requestedAlternates, (ISpPhraseAlt**)pcpPhraseAlt, &ulCount); } if (SUCCEEDED(hr)) { // Browse the list of alternative phrases in order of highest likelyhood with the original phrase for (unsigned int i = 0; i < ulCount; i++) { SPPHRASE* pPhraseAlt; CSpDynamicString pwszAlternate; // Retrieve information about the current alternative phrase pcpPhraseAlt[i]->GetPhrase(&pPhraseAlt); // Get the phrase's entire text string hr = pcpPhraseAlt[i]->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &pwszAlternate, NULL); if (SUCCEEDED(hr)) { if (i == 1 && pconfidence != NULL) *pconfidence = pPhraseAlt->pElements->SREngineConfidence; if (alternatesConfidence != NULL) alternatesConfidence[i] = pPhraseAlt->pElements->SREngineConfidence; if (alternates != NULL) alternates[i] = pwszAlternate.Copy(); // .CopyToChar(); } } } }
//----------------------------------------------------------------------------- // Purpose: Given a wave file and a string of words "text", creates a CFG from the // sentence and stores the resulting words/phonemes in CSentence // Input : *wavname - // text - // sentence - // (*pfnPrint - // Output : SR_RESULT //----------------------------------------------------------------------------- SR_RESULT ExtractPhonemes( const char *wavname, CSpDynamicString& text, CSentence& sentence, void (*pfnPrint)( const char *fmt, ...) ) { // Assume failure SR_RESULT result = SR_RESULT_ERROR; if ( text.Length() <= 0 ) { pfnPrint( "Error: no rule / text specified\n" ); return result; } USES_CONVERSION; HRESULT hr; CUtlVector < WORDRULETYPE > wordRules; CComPtr<ISpStream> cpInputStream; CComPtr<ISpRecognizer> cpRecognizer; CComPtr<ISpRecoContext> cpRecoContext; CComPtr<ISpRecoGrammar> cpRecoGrammar; CComPtr<ISpPhoneConverter> cpPhoneConv; // Create basic SAPI stream object // NOTE: The helper SpBindToFile can be used to perform the following operations hr = cpInputStream.CoCreateInstance(CLSID_SpStream); if ( FAILED( hr ) ) { pfnPrint( "Error: SAPI 5.1 Stream object not installed?\n" ); return result; } CSpStreamFormat sInputFormat; // setup stream object with wav file MY_WAVE_AUDIO_FILENAME // for read-only access, since it will only be access by the SR engine hr = cpInputStream->BindToFile( T2W(wavname), SPFM_OPEN_READONLY, NULL, sInputFormat.WaveFormatExPtr(), SPFEI_ALL_EVENTS ); if ( FAILED( hr ) ) { pfnPrint( "Error: couldn't open wav file %s\n", wavname ); return result; } // Create in-process speech recognition engine hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer); if ( FAILED( hr ) ) { pfnPrint( "Error: SAPI 5.1 In process recognizer object not installed?\n" ); return result; } // Create recognition context to receive events hr = cpRecognizer->CreateRecoContext(&cpRecoContext); if ( FAILED( hr ) ) { pfnPrint( "Error: SAPI 5.1 Unable to create recognizer context\n" ); return result; } // Create a grammar hr = cpRecoContext->CreateGrammar( EP_GRAM_ID, &cpRecoGrammar ); if ( FAILED( hr ) ) { pfnPrint( "Error: SAPI 5.1 Unable to create recognizer grammar\n" ); return result; } LANGID englishID = 0x409; // 1033 decimal bool userSpecified = false; LANGID langID = SpGetUserDefaultUILanguage(); // Allow commandline override if ( CommandLine()->FindParm( "-languageid" ) != 0 ) { userSpecified = true; langID = CommandLine()->ParmValue( "-languageid", langID ); } // Create a phoneme converter ( so we can convert to IPA codes ) hr = SpCreatePhoneConverter( langID, NULL, NULL, &cpPhoneConv ); if ( FAILED( hr ) ) { if ( langID != englishID ) { if ( userSpecified ) { pfnPrint( "Warning: SAPI 5.1 Unable to create phoneme converter for command line override -languageid %i\n", langID ); } else { pfnPrint( "Warning: SAPI 5.1 Unable to create phoneme converter for default UI language %i\n",langID ); } // Try english!!! langID = englishID; hr = SpCreatePhoneConverter( langID, NULL, NULL, &cpPhoneConv ); } if ( FAILED( hr ) ) { pfnPrint( "Error: SAPI 5.1 Unable to create phoneme converter for English language id %i\n", langID ); return result; } else { pfnPrint( "Note: SAPI 5.1 Falling back to use english -languageid %i\n", langID ); } } else if ( userSpecified ) { pfnPrint( "Note: SAPI 5.1 Using user specified -languageid %i\n",langID ); } SPSTATEHANDLE hStateRoot; // create/re-create Root level rule of grammar hr = cpRecoGrammar->GetRule(L"Root", 0, SPRAF_TopLevel | SPRAF_Active, TRUE, &hStateRoot); if ( FAILED( hr ) ) { pfnPrint( "Error: SAPI 5.1 Unable to create root rule\n" ); return result; } // Inactivate it so we can alter it hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_INACTIVE ); if ( FAILED( hr ) ) { pfnPrint( "Error: SAPI 5.1 Unable to deactivate grammar rules\n" ); return result; } // Create the rule set from the words in text { CSpDynamicString currentWord; WCHAR *pos = ( WCHAR * )text; WCHAR str[ 2 ]; str[1]= 0; while ( *pos ) { if ( *pos == L' ' /*|| *pos == L'.' || *pos == L'-'*/ ) { // Add word to rule set if ( currentWord.Length() > 0 ) { AddWordRule( cpRecoGrammar, &hStateRoot, &wordRules, currentWord ); currentWord.Clear(); } pos++; continue; } // Skip anything that's inside a [ xxx ] pair. if ( *pos == L'[' ) { while ( *pos && *pos != L']' ) { pos++; } if ( *pos ) { pos++; } continue; } str[ 0 ] = *pos; currentWord.Append( str ); pos++; } if ( currentWord.Length() > 0 ) { AddWordRule( cpRecoGrammar, &hStateRoot, &wordRules, currentWord ); } if ( wordRules.Size() <= 0 ) { pfnPrint( "Error: Text %s contained no usable words\n", text ); return result; } // Build all word to word transitions in the grammar if ( !BuildRules( cpRecoGrammar, &hStateRoot, &wordRules ) ) { pfnPrint( "Error: Rule set for %s could not be generated\n", text ); return result; } } // check for recognitions and end of stream event const ULONGLONG ullInterest = SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM) | SPFEI(SPEI_FALSE_RECOGNITION) | SPFEI(SPEI_PHRASE_START ) | SPFEI(SPEI_HYPOTHESIS ) | SPFEI(SPEI_INTERFERENCE) ; hr = cpRecoContext->SetInterest( ullInterest, ullInterest ); if ( FAILED( hr ) ) { pfnPrint( "Error: SAPI 5.1 Unable to set interest level\n" ); return result; } // use Win32 events for command-line style application hr = cpRecoContext->SetNotifyWin32Event(); if ( FAILED( hr ) ) { pfnPrint( "Error: SAPI 5.1 Unable to set win32 notify event\n" ); return result; } // connect wav input to recognizer // SAPI will negotiate mismatched engine/input audio formats using system audio codecs, so second parameter is not important - use default of TRUE hr = cpRecognizer->SetInput(cpInputStream, TRUE); if ( FAILED( hr ) ) { pfnPrint( "Error: SAPI 5.1 Unable to associate input stream\n" ); return result; } // Activate the CFG ( rather than using dictation ) hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_ACTIVE ); if ( FAILED( hr ) ) { switch ( hr ) { case E_INVALIDARG: pfnPrint( "pszName is invalid or bad. Alternatively, pReserved is non-NULL\n" ); break; case SP_STREAM_UNINITIALIZED: pfnPrint( "ISpRecognizer::SetInput has not been called with the InProc recognizer\n" ); break; case SPERR_UNINITIALIZED: pfnPrint( "The object has not been properly initialized.\n"); break; case SPERR_UNSUPPORTED_FORMAT: pfnPrint( "Audio format is bad or is not recognized. Alternatively, the device driver may be busy by another application and cannot be accessed.\n" ); break; case SPERR_NOT_TOPLEVEL_RULE: pfnPrint( "The rule pszName exists, but is not a top-level rule.\n" ); break; default: pfnPrint( "Unknown error\n" ); break; } pfnPrint( "Error: SAPI 5.1 Unable to activate rule set\n" ); return result; } // while events occur, continue processing // timeout should be greater than the audio stream length, or a reasonable amount of time expected to pass before no more recognitions are expected in an audio stream BOOL fEndStreamReached = FALSE; while (!fEndStreamReached && S_OK == cpRecoContext->WaitForNotifyEvent( SR_WAVTIMEOUT )) { CSpEvent spEvent; // pull all queued events from the reco context's event queue while (!fEndStreamReached && S_OK == spEvent.GetFrom(cpRecoContext)) { // Check event type switch (spEvent.eEventId) { case SPEI_INTERFERENCE: { SPINTERFERENCE interference = spEvent.Interference(); switch ( interference ) { case SPINTERFERENCE_NONE: pfnPrint( "[ I None ]\r\n" ); break; case SPINTERFERENCE_NOISE: pfnPrint( "[ I Noise ]\r\n" ); break; case SPINTERFERENCE_NOSIGNAL: pfnPrint( "[ I No Signal ]\r\n" ); break; case SPINTERFERENCE_TOOLOUD: pfnPrint( "[ I Too Loud ]\r\n" ); break; case SPINTERFERENCE_TOOQUIET: pfnPrint( "[ I Too Quiet ]\r\n" ); break; case SPINTERFERENCE_TOOFAST: pfnPrint( "[ I Too Fast ]\r\n" ); break; case SPINTERFERENCE_TOOSLOW: pfnPrint( "[ I Too Slow ]\r\n" ); break; default: break; } } break; case SPEI_PHRASE_START: pfnPrint( "Phrase Start\r\n" ); sentence.MarkNewPhraseBase(); break; case SPEI_HYPOTHESIS: case SPEI_RECOGNITION: case SPEI_FALSE_RECOGNITION: { CComPtr<ISpRecoResult> cpResult; cpResult = spEvent.RecoResult(); CSpDynamicString dstrText; if (spEvent.eEventId == SPEI_FALSE_RECOGNITION) { dstrText = L"(Unrecognized)"; result = SR_RESULT_FAILED; // It's possible that the failed recog might have more words, so see if that's the case EnumeratePhonemes( cpPhoneConv, cpResult, sentence ); } else { // Hypothesis or recognition success cpResult->GetText( (ULONG)SP_GETWHOLEPHRASE, (ULONG)SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL); EnumeratePhonemes( cpPhoneConv, cpResult, sentence ); if ( spEvent.eEventId == SPEI_RECOGNITION ) { result = SR_RESULT_SUCCESS; } pfnPrint( va( "%s%s\r\n", spEvent.eEventId == SPEI_HYPOTHESIS ? "[ Hypothesis ] " : "", dstrText.CopyToChar() ) ); } cpResult.Release(); } break; // end of the wav file was reached by the speech recognition engine case SPEI_END_SR_STREAM: fEndStreamReached = TRUE; break; } // clear any event data/object references spEvent.Clear(); }// END event pulling loop - break on empty event queue OR end stream }// END event polling loop - break on event timeout OR end stream // Deactivate rule hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_INACTIVE ); if ( FAILED( hr ) ) { pfnPrint( "Error: SAPI 5.1 Unable to deactivate rule set\n" ); return result; } // close the input stream, since we're done with it // NOTE: smart pointer will call SpStream's destructor, and consequently ::Close, but code may want to check for errors on ::Close operation hr = cpInputStream->Close(); if ( FAILED( hr ) ) { pfnPrint( "Error: SAPI 5.1 Unable to close input stream\n" ); return result; } return result; }
//----------------------------------------------------------------------------- // Purpose: Given a wavfile and a list of inwords, determines the word/phonene // sample counts for the sentce // Input : *wavfile - // *inwords - // *outphonemes{ text.Clear( - // Output : SR_RESULT //----------------------------------------------------------------------------- static SR_RESULT SAPI_ExtractPhonemes( const char *wavfile, int numsamples, void (*pfnPrint)( const char *fmt, ... ), CSentence& inwords, CSentence& outwords ) { LogReset(); USES_CONVERSION; CSpDynamicString text; text.Clear(); HKEY hkwipe; LONG lResult = RegOpenKeyEx( HKEY_CURRENT_USER, "Software\\Microsoft\\Speech\\RecoProfiles", 0, KEY_ALL_ACCESS, &hkwipe ); if ( lResult == ERROR_SUCCESS ) { RecursiveRegDelKey( hkwipe ); RegCloseKey( hkwipe ); } if ( strlen( inwords.GetText() ) <= 0 ) { inwords.SetTextFromWords(); } // Construct a string from the inwords array text.Append( T2W( inwords.GetText() ) ); // Assume failure SR_RESULT result = SR_RESULT_ERROR; if ( text.Length() > 0 ) { CSentence sentence; pfnPrint( "Processing...\r\n" ); // Give it a try result = ExtractPhonemes( wavfile, text, sentence, pfnPrint ); pfnPrint( "Finished.\r\n" ); // PrintWordsAndPhonemes( sentence, pfnPrint ); // Copy results to outputs outwords.Reset(); outwords.SetText( inwords.GetText() ); Log( "Starting\n" ); LogWords( inwords ); if ( SR_RESULT_ERROR != result ) { int i; Log( "Hypothesized\n" ); LogWords( sentence ); for( i = 0 ; i < sentence.m_Words.Size(); i++ ) { CWordTag *tag = sentence.m_Words[ i ]; if ( tag ) { // Skip '...' tag if ( stricmp( tag->GetWord(), "..." ) ) { CWordTag *newTag = new CWordTag( *tag ); outwords.m_Words.AddToTail( newTag ); } } } // Now insert unrecognized/skipped words from original list // int frompos = 0, topos = 0; while( 1 ) { // End of source list if ( frompos >= inwords.m_Words.Size() ) break; const CWordTag *fromTag = inwords.m_Words[ frompos ]; // Reached end of destination list, just copy words over from from source list until // we run out of source words if ( topos >= outwords.m_Words.Size() ) { // Just copy words over CWordTag *newWord = new CWordTag( *fromTag ); // Remove phonemes while ( newWord->m_Phonemes.Size() > 0 ) { CPhonemeTag *kill = newWord->m_Phonemes[ 0 ]; newWord->m_Phonemes.Remove( 0 ); delete kill; } outwords.m_Words.AddToTail( newWord ); frompos++; topos++; continue; } // Destination word const CWordTag *toTag = outwords.m_Words[ topos ]; // Words match, just skip ahead if ( !stricmp( fromTag->GetWord(), toTag->GetWord() ) ) { frompos++; topos++; continue; } // The only case we handle is that something in the source wasn't in the destination // Find the next source word that appears in the destination int skipAhead = frompos + 1; bool found = false; while ( skipAhead < inwords.m_Words.Size() ) { const CWordTag *sourceWord = inwords.m_Words[ skipAhead ]; if ( !stricmp( sourceWord->GetWord(), toTag->GetWord() ) ) { found = true; break; } skipAhead++; } // Uh oh destination has words that are not in source, just skip to next destination word? if ( !found ) { topos++; } else { // Copy words from from source list into destination // int skipCount = skipAhead - frompos; while ( --skipCount>= 0 ) { const CWordTag *sourceWord = inwords.m_Words[ frompos++ ]; CWordTag *newWord = new CWordTag( *sourceWord ); // Remove phonemes while ( newWord->m_Phonemes.Size() > 0 ) { CPhonemeTag *kill = newWord->m_Phonemes[ 0 ]; newWord->m_Phonemes.Remove( 0 ); delete kill; } outwords.m_Words.InsertBefore( topos, newWord ); topos++; } frompos++; topos++; } } Log( "\nDone simple check\n" ); LogWords( outwords ); LogPhonemes( outwords ); ComputeMissingByteSpans( numsamples, outwords ); Log( "\nFinal check\n" ); LogWords( outwords ); LogPhonemes( outwords ); } } else { pfnPrint( "Input sentence is empty!\n" ); } // Return results return result; }