QtSpeech::VoiceNames QtSpeech::voices() { VoiceNames vs; ULONG count = 0; CComPtr<IEnumSpObjectTokens> voices; CoInitialize(NULL); SysCall( SpEnumTokens(SPCAT_VOICES, NULL, NULL, &voices), LogicError); SysCall( voices->GetCount(&count), LogicError); for(int i=0; i< count; i++) { WCHAR * w_id = 0L; WCHAR * w_name = 0L; CComPtr<ISpObjectToken> voice; SysCall( voices->Next( 1, &voice, NULL ), LogicError); SysCall( SpGetDescription(voice, &w_name), LogicError); SysCall( voice->GetId(&w_id), LogicError); QString id = QString::fromWCharArray(w_id); QString name = QString::fromWCharArray(w_name); VoiceName n = { id, name }; vs << n; voice.Release(); } return vs; }
//------------------------------------------------------------------------------ bool SpeechApi51::setVoice(const std::wstring &voice) { m_voice = voice; if (!isLoaded()) { return true; } // get a voice enumerator CComPtr<IEnumSpObjectTokens> cpEnum; if (FAILED(SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum))) { return false; } // iterate through the list till we find a matching voice ISpObjectToken *voice_token; while (S_OK == cpEnum->Next(1, &voice_token, NULL)) { CSpDynamicString voice_str; if (SUCCEEDED(SpGetDescription(voice_token, &voice_str)) && (voice == voice_str.Copy())) { m_sapi->SetVoice(voice_token); return true; } } return false; }
//------------------------------------------------------------------------------ std::vector<std::wstring> SpeechApi51::getVoices() const { std::vector<std::wstring> ret; CoInitialize(NULL); // get a voice enumerator CComPtr<IEnumSpObjectTokens> cpEnum; if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum)) { return ret; } // iterate through the voices and add them to the string vector ISpObjectToken *voice_token; while (S_OK == cpEnum->Next(1, &voice_token, NULL)) { CSpDynamicString voice_str; if (SUCCEEDED(SpGetDescription(voice_token, &voice_str))) { ret.push_back(voice_str.Copy()); } } return ret; }
int main(int argc, char* argv[]) { //Parse de parameters. Similar way to acapelaCmd std::string voice = "iCub_eng"; if (argc>1) voice = argv[1]; std::cout << "Voice is: " << voice << std::endl; std::cout << "TODO : select the right token from this option." << voice << std::endl; std::string textInput; std::getline(std::cin, textInput); std::cout << "Text is: " << textInput << std::endl; if (::CoInitializeEx(NULL, COINIT_MULTITHREADED) == S_OK) { HRESULT hr = S_OK; CComPtr<IEnumSpObjectTokens> cpIEnum; CComPtr<ISpObjectToken> cpToken; CComPtr<ISpVoice> cpVoice; // Enumerate voice tokens that speak US English in a female voice. hr = SpEnumTokens(SPCAT_VOICES, L"Language=409", L"Gender=Female;", &cpIEnum); // Get the best matching token. if (SUCCEEDED(hr)) { hr = cpIEnum->Next(1, &cpToken , NULL); } // Create a voice and set its token to the one we just found. if (SUCCEEDED(hr)) { hr = cpVoice.CoCreateInstance(CLSID_SpVoice); } // Set the voice. if (SUCCEEDED(hr)) { hr = cpVoice->SetVoice(cpToken); } // Set the output to the default audio device. if (SUCCEEDED(hr)) { hr = cpVoice->SetOutput(NULL, TRUE); } // Speak a string directly. if (SUCCEEDED(hr)) { hr = cpVoice->Speak(s2ws(textInput).c_str(), NULL, NULL); } } ::CoUninitialize(); return 0; }
DWORD WINAPI HandRaisExcer::Txt2SpeechThread() { HRESULT hr = S_OK; CComPtr<ISpObjectToken> cpAudioOutToken; CComPtr<IEnumSpObjectTokens> cpEnum; CComPtr<ISpVoice> cpVoice; ULONG ulCount = 0; if (FAILED(::CoInitialize(NULL))) return FALSE; // Create the SAPI voice. hr = cpVoice.CoCreateInstance(CLSID_SpVoice); if (SUCCEEDED (hr)) { // Enumerate the available audio output devices. hr = SpEnumTokens( SPCAT_AUDIOOUT, NULL, NULL, &cpEnum); } if (SUCCEEDED (hr)) { // Get the number of audio output devices. hr = cpEnum->GetCount( &ulCount); } if (SUCCEEDED (hr)) { hr = cpEnum->Next( 1, &cpAudioOutToken, NULL ); } if (SUCCEEDED (hr)) { hr = cpVoice->SetOutput( cpAudioOutToken, TRUE ); } if (SUCCEEDED (hr)) { cpVoice->SetRate(-1); while (TRUE) { if (m_isHandReinitial) { m_isHandReinitial = FALSE; Sleep(100); //Refresh m_pStrToSpeak cpVoice->Speak(m_pStrToSpeak, SPF_DEFAULT, NULL); } } } ::CoUninitialize(); if (FAILED(hr)) return FALSE; return TRUE; }
HRESULT TTSLib::Initialize() { HRESULT hr = S_OK; CComPtr<IEnumSpObjectTokens> cpEnum; ULONG ulCount = 0; //Init SAPI if (FAILED(::CoInitialize(NULL))) { m_lastErrorMessage = L"CoInitialize failed!"; return FALSE; } // Create the SAPI voice. hr = m_ispVoice.CoCreateInstance(CLSID_SpVoice); if (FAILED(hr)) { m_lastErrorMessage = L"CoCreateInstance failed!"; return FALSE; } // Enumerate the available voices. hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum); if (FAILED(hr)) { m_lastErrorMessage = L"SpEnumTokens failed!"; return FALSE; } //Get count hr = cpEnum->GetCount(&ulCount); if (FAILED(hr)) { m_lastErrorMessage = L"GetCount cpEnum failed!"; return FALSE; } //Get available object token voices in system m_ispObjectTokens.resize(ulCount); for (size_t i = 0; i < ulCount; ++i) { cpEnum->Next(1, &m_ispObjectTokens[i], NULL); } return TRUE; }
void DumpCategory(LPCWSTR category) { // enumerate tokens in each category IEnumSpObjectTokens *pEnumSpObjectTokens = nullptr; HRESULT hr = SpEnumTokens(category, nullptr, nullptr, &pEnumSpObjectTokens); if (SPERR_NOT_FOUND == hr) { LOG(L" None found."); return; } else if (FAILED(hr)) { ERR(L"SpEnumTokens failed: hr = 0x%08x", hr); return; } ReleaseOnExit rEnumSpObjectTokens(pEnumSpObjectTokens); ULONG nTokens = 0; hr = pEnumSpObjectTokens->GetCount(&nTokens); if (FAILED(hr)) { ERR(L"IEnumSpObjectTokens::GetCount failed: hr = 0x%08x", hr); return; } for (ULONG token = 0; token < nTokens; token++) { ISpObjectToken *pSpObjectToken = nullptr; hr = pEnumSpObjectTokens->Next(1, &pSpObjectToken, nullptr); if (FAILED(hr)) { ERR(L"IEnumSpObjectTokens::Next failed: hr = 0x%08x", hr); return; } ReleaseOnExit rSpObjectToken(pSpObjectToken); LPWSTR description = nullptr; hr = SpGetDescription(pSpObjectToken, &description); if (FAILED(hr)) { ERR(L"SpGetDescription failed: hr = 0x%08x", hr); continue; } CoTaskMemFreeOnExit fDescription(description); LOG(L" #%u: %s", token + 1, description); EnumDataKey(2, pSpObjectToken); } }
bool speech_set_voice(int voice) { #ifdef _WIN32 HRESULT hr; CComPtr<ISpObjectToken> cpVoiceToken; CComPtr<IEnumSpObjectTokens> cpEnum; ULONG num_voices = 0; //Enumerate the available voices hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum); if(FAILED(hr)) return false; hr = cpEnum->GetCount(&num_voices); if(FAILED(hr)) return false; int count = 0; // Obtain a list of available voice tokens, set the voice to the token, and call Speak while (num_voices -- ) { cpVoiceToken.Release(); hr = cpEnum->Next( 1, &cpVoiceToken, NULL ); if(FAILED(hr)) { return false; } if(count == voice) { return SUCCEEDED(Voice_device->SetVoice(cpVoiceToken)); } count++; } return false; #else STUB_FUNCTION; return true; #endif }
TTSWidget::TTSWidget(QWidget* parent /*= 0*/, Qt::WFlags flags /*= 0*/) : QWidget(parent, flags) { setupUi(this); #ifdef _WIN32 CoInitialize(NULL); HRESULT hr = S_OK; CComPtr<ISpObjectToken> cpVoiceToken; CComPtr<IEnumSpObjectTokens> cpEnum; CComPtr<ISpVoice> cpVoice; ULONG ulCount = 0; // Create the SAPI voice if(SUCCEEDED(hr)) hr = cpVoice.CoCreateInstance( CLSID_SpVoice ); //Enumerate the available voices if(SUCCEEDED(hr)) hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum); //Get the number of voices if(SUCCEEDED(hr)) hr = cpEnum->GetCount(&ulCount); // Obtain a list of available voice tokens while (SUCCEEDED(hr) && ulCount-- ) { cpVoiceToken.Release(); if(SUCCEEDED(hr)) hr = cpEnum->Next( 1, &cpVoiceToken, NULL ); WCHAR* voiceName = 0; if (SUCCEEDED(SpGetDescription(cpVoiceToken, &voiceName))) { m_voice->addItem(QString::fromUtf16((const ushort*) voiceName)); CoTaskMemFree(voiceName); } } CoUninitialize(); #endif connect(m_voice, SIGNAL(currentIndexChanged(int)), this, SLOT(voiceChanged(int))); connect(m_speakButton, SIGNAL(clicked()), this, SLOT(speak())); }
QtSpeech::QtSpeech(VoiceName n, QObject * parent) :QObject(parent), d(new Private) { ULONG count = 0; CComPtr<IEnumSpObjectTokens> voices; CoInitialize(NULL); SysCall( d->voice.CoCreateInstance( CLSID_SpVoice ), InitError); if (n.id.isEmpty()) { WCHAR * w_id = 0L; WCHAR * w_name = 0L; CComPtr<ISpObjectToken> voice; SysCall( d->voice->GetVoice(&voice), InitError); SysCall( SpGetDescription(voice, &w_name), InitError); SysCall( voice->GetId(&w_id), InitError); n.name = QString::fromWCharArray(w_name); n.id = QString::fromWCharArray(w_id); voice.Release(); } else { SysCall( SpEnumTokens(SPCAT_VOICES, NULL, NULL, &voices), InitError); SysCall( voices->GetCount(&count), InitError); for (int i =0; i< count; i++) { WCHAR * w_id = 0L; CComPtr<ISpObjectToken> voice; SysCall( voices->Next( 1, &voice, NULL ), InitError); SysCall( voice->GetId(&w_id), InitError); QString id = QString::fromWCharArray(w_id); if (id == n.id) d->voice->SetVoice(voice); voice.Release(); } } if (n.id.isEmpty()) throw InitError(Where+"No default voice in system"); d->name = n; d->ptrs << this; }
bool SapiInterface::SetVoiceNative(CString voice) { CComPtr<IEnumSpObjectTokens> cpEnum; CSpDynamicString* szDescription; CComPtr<ISpObjectToken> cpVoiceToken; //the token is the voice CComPtr<ISpVoice> cpVoice; ULONG ulCount = 0; //This gets an enumeration of all voices on the system hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum); if (!SUCCEEDED(hr) ){ return false; //enumeration of voices failed } hr = cpEnum->GetCount(&ulCount); szDescription = new CSpDynamicString [ulCount]; UINT i = 0; while (SUCCEEDED(hr) && ulCount--) { cpVoiceToken.Release(); if (SUCCEEDED (hr)) { hr = cpEnum->Next(1, &cpVoiceToken, NULL); hr = SpGetDescription(cpVoiceToken, &szDescription[i]); } if (SUCCEEDED (hr)) { if(CString(szDescription[i]) == voice) { hr = pVoice->SetVoice(cpVoiceToken); delete [] szDescription; return true; } } i++; } //delete the voice list if one is not found and return false delete [] szDescription; return false; }
void CSpeechSynthesizer::Initialize() { // Language=C09;Language=809;Language=409;Language=411;Language=412;Language=804;Language=C04;Language=404; m_tokenMap = new std::map<std::string*, std::string*>(); m_registryList = new std::map<std::string*, std::string*>(); getVoicesInRegistry(); CoInitialize(cpIEnum); // Enumerate voice tokens that speak US English in a female voice. HRESULT hr = SpEnumTokens(SPCAT_VOICES, L"", L"Gender=Female;", &cpIEnum); // Get the best matching token. if (SUCCEEDED(hr)) { hr = cpIEnum->Next(1, &cpToken, NULL); } // Create a voice and set its token to the one we just found. if (SUCCEEDED(hr)) { hr = cpVoice.CoCreateInstance(CLSID_SpVoice); } // Set the voice. if (SUCCEEDED(hr)) { hr = cpVoice->SetVoice(cpToken); } // Set the output to the default audio device. if (SUCCEEDED(hr)) { hr = cpVoice->SetOutput(NULL, TRUE); } m_initialized = true; }
/* * Some notes about this function: * This function returns a list of available SAPI voices. It has been fixed and should be working correclty * on Windows 7 and all other versions of Windows. * * The solution to getting this function working was found here: * http://stackoverflow.com/questions/4336245/how-to-return-a-java-string-in-c-using-jni * * In addition to the above StackOverflow thread, it was necessary to change this function to return an std::string instance instead * of a character pointer, and to use stringstreams instead of CStrings. */ std::string SapiInterface::GetVoiceNative() { std::stringstream sstream; sstream << "<?xml version=\"1.0\"?>"; ISpVoice * pVoice = NULL; CComPtr<ISpObjectToken> cpVoiceToken; CComPtr<IEnumSpObjectTokens> cpEnum; CComPtr<ISpVoice> cpVoice; ULONG ulCount = 0; HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&pVoice); if(SUCCEEDED(hr)) { WCHAR **m_ppszTokenIds; USES_CONVERSION; CComPtr<IEnumSpObjectTokens> cpEnum; WCHAR** szDescription; ISpObjectToken *pToken = NULL; CComPtr<ISpObjectToken> cpVoiceToken; //the token is the voice CComPtr<ISpVoice> cpVoice; ULONG ulCount = 0; if(SUCCEEDED(hr)) { hr = cpVoice.CoCreateInstance( CLSID_SpVoice ); if(SUCCEEDED(hr)) { WCHAR *pszCurTokenId = NULL; ULONG ulIndex = 0, ulNumTokens = 0, ulCurToken = -1; hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum); if (hr == S_OK) { hr = cpEnum->GetCount( &ulNumTokens ); if (SUCCEEDED(hr) && ulNumTokens != 0) { szDescription = new WCHAR* [ulNumTokens]; m_ppszTokenIds = new WCHAR* [ulNumTokens]; ZeroMemory(m_ppszTokenIds, ulNumTokens * sizeof(WCHAR *)); UINT i =0; while (cpEnum->Next(1, &pToken, NULL) == S_OK) { //Don't care about return value in next line: hr = SpGetDescription(pToken, &szDescription[ulIndex]); ulIndex++; int utf8_length = WideCharToMultiByte( CP_UTF8, // Convert to UTF-8 0, // No special character conversions required // (UTF-16 and UTF-8 support the same characters) szDescription[i], // UTF-16 string to convert -1, // utf16 is NULL terminated (if not, use length) NULL, // Determining correct output buffer size 0, // Determining correct output buffer size NULL, // Must be NULL for CP_UTF8 NULL); // Must be NULL for CP_UTF8 if (utf8_length == 0) { return std::string("WideCharToMultibyte error."); } char *utf8_voice = new char[utf8_length]; utf8_length = WideCharToMultiByte( CP_UTF8, // Convert to UTF-8 0, // No special character conversions required // (UTF-16 and UTF-8 support the same characters) szDescription[i], // UTF-16 string to convert -1, // utf16 is NULL terminated (if not, use length) utf8_voice, // UTF-8 output buffer utf8_length, // UTF-8 output buffer size NULL, // Must be NULL for CP_UTF8 NULL); // Must be NULL for CP_UTF8 //strConcatenateXML += "<voice>" + szDescription[i] + "</voice>"; sstream << "<voice>" << utf8_voice << "</voice>"; pToken->Release(); pToken = NULL; i++; } delete [] szDescription; } else { //strConcatenateXML = "No voice found. (5)"; sstream << "No voice found. (5)"; } } else { //strConcatenateXML = "No voice found. (4)"; sstream << "No voice found. (4)"; } } else { //strConcatenateXML = "No voice found. (3)"; sstream << "No voice found. (3)"; } } else { //strConcatenateXML = "No voice found. (2)"; sstream << "No voice found. (2)"; } } else { //strConcatenateXML = "No voice found. (1)"; sstream << "No voice found. (1)"; } return sstream.str(); }
/****************************************************************************** * ManageEmployeesPaneProc * *-------------------------* * Description: * Handles messages specifically for the manage employees pane. * ******************************************************************************/ LRESULT ManageEmployeesPaneProc( HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam ) { static ULONG ulNumTokens; static ULONG ulCurToken; static WCHAR** ppszTokenIds; static CSpDynamicString* ppcDescriptionString; // This is string helper class in sphelper.h static UINT iCurEnum; // Indicates if we should list males, females, or both switch ( message ) { case WM_GOTOOFFICE: { // Set the right message handler and repaint g_fpCurrentPane = OfficePaneProc; //Cleanup our variables ManageEmployeesPaneCleanup( ppszTokenIds, ppcDescriptionString, ulNumTokens ); ppszTokenIds = NULL; ppcDescriptionString = NULL; ulNumTokens = 0; // Set the hear voice rule to inactive HRESULT hr = g_cpCmdGrammar->SetRuleIdState( VID_HearTheVoice, SPRS_INACTIVE ); _ASSERTE( SUCCEEDED( hr ) ); hr = g_cpCmdGrammar->SetRuleIdState( VID_OtherRules, SPRS_INACTIVE ); _ASSERTE( SUCCEEDED( hr ) ); hr = g_cpCmdGrammar->SetRuleIdState( DYN_TTSVOICERULE, SPRS_ACTIVE ); _ASSERTE( SUCCEEDED( hr ) ); PostMessage( hWnd, WM_INITPANE, NULL, NULL ); InvalidateRect( hWnd, NULL, TRUE ); return ( 1 ); } case WM_GOTOCOUNTER: { // Set the right message handler and repaint g_fpCurrentPane = CounterPaneProc; //Cleanup our variables ManageEmployeesPaneCleanup( ppszTokenIds, ppcDescriptionString, ulNumTokens ); ppszTokenIds = NULL; ppcDescriptionString = NULL; ulNumTokens = 0; // Set the hear voice rule to inactive HRESULT hr = g_cpCmdGrammar->SetRuleIdState( VID_HearTheVoice, SPRS_INACTIVE ); _ASSERTE( SUCCEEDED( hr ) ); hr = g_cpCmdGrammar->SetRuleIdState( VID_OtherRules, SPRS_INACTIVE ); _ASSERTE( SUCCEEDED( hr ) ); hr = g_cpCmdGrammar->SetRuleIdState( DYN_TTSVOICERULE, SPRS_ACTIVE ); _ASSERTE( SUCCEEDED( hr ) ); PostMessage( hWnd, WM_INITPANE, NULL, NULL ); InvalidateRect( hWnd, NULL, TRUE ); return ( 1 ); } case WM_PAINT: { // Do the actual UI paint ManageEmployeesPanePaint( hWnd, ulNumTokens, ppcDescriptionString, ulCurToken, iCurEnum ); return ( 1 ); } case WM_INITPANE: { ISpObjectToken *pToken = NULL; // Token interface pointer CComPtr<IEnumSpObjectTokens> cpEnum; // Pointer to token enumerator ULONG ulIndex = 0; ulCurToken = 0xffffffff; WCHAR *szRequiredAttributes = NULL; SPSTATEHANDLE hDynamicRuleHandle; // Handle to our dynamic rule // Set the required attributes field for the enum if we have special needs // based on our LPARAM in if ( 0 == lParam ) { szRequiredAttributes = L"Gender=Male"; } else if ( 1 == lParam ) { szRequiredAttributes = L"Gender=Female"; } // Get a token enumerator for tts voices available HRESULT hr = SpEnumTokens(SPCAT_VOICES, szRequiredAttributes, NULL, &cpEnum); if ( S_OK == hr ) { // Get the numbers of tokens found hr = cpEnum->GetCount( &ulNumTokens ); if ( SUCCEEDED( hr ) && 0 != ulNumTokens ) { // Create arrays we need for storing data ppcDescriptionString = new CSpDynamicString [ulNumTokens]; if ( NULL == ppcDescriptionString ) { break; } ppszTokenIds = new WCHAR* [ulNumTokens]; if ( NULL == ppszTokenIds ) { break; } ZeroMemory( ppszTokenIds, ulNumTokens*sizeof( WCHAR* ) ); // Get the next token in the enumeration // State is maintained in the enumerator while (cpEnum->Next(1, &pToken, NULL) == S_OK) { // Get a string which describes the token, in our case, the voice name hr = SpGetDescription( pToken, &ppcDescriptionString[ulIndex] ); _ASSERTE( SUCCEEDED( hr ) ); // Get the token id, for a low overhead way to retrieve the token later // without holding on to the object itself hr = pToken->GetId( &ppszTokenIds[ulIndex] ); _ASSERTE( SUCCEEDED( hr ) ); ulIndex++; // Release the token itself pToken->Release(); pToken = NULL; } } // if we've failed to properly initialize, then we should completely shut-down if ( S_OK != hr ) { if ( pToken ) { pToken->Release(); } ManageEmployeesPaneCleanup( ppszTokenIds, ppcDescriptionString, ulNumTokens ); ppszTokenIds = NULL; ppcDescriptionString = NULL; ulNumTokens = 0; } // Find out which token corresponds to our voice which is currently in use else { WCHAR *pszCurTokenId = NULL; // Get the token representing the current voice hr = g_cpVoice->GetVoice( &pToken ); if ( SUCCEEDED( hr ) ) { // Get the current token ID, and compare it against others to figure out // which description string is the one currently selected. hr = pToken->GetId( &pszCurTokenId ); if ( SUCCEEDED( hr ) ) { ulIndex = 0; while ( ulIndex < ulNumTokens && 0 != _wcsicmp( pszCurTokenId, ppszTokenIds[ulIndex] ) ) { ulIndex++; } // We found it, so set the current index to that of the current token if ( ulIndex < ulNumTokens ) { ulCurToken = ulIndex; } CoTaskMemFree( pszCurTokenId ); } pToken->Release(); } } } // Initially, we see both genders _ASSERTE( lParam >= 0 && lParam <= 2); iCurEnum = (UINT)lParam; // Create a dynamic rule containing the description strings of the voice tokens hr = g_cpCmdGrammar->GetRule(NULL, DYN_TTSVOICERULE, SPRAF_TopLevel | SPRAF_Active | SPRAF_Dynamic, TRUE, &hDynamicRuleHandle); if ( SUCCEEDED( hr ) ) { // Clear the rule first hr = g_cpCmdGrammar->ClearRule( hDynamicRuleHandle ); _ASSERTE( SUCCEEDED( hr ) ); // Commit the changes hr = g_cpCmdGrammar->Commit(0); _ASSERTE( SUCCEEDED( hr ) ); // Add description names as the word, ulIndex as id for ( ulIndex = 0; ulIndex < ulNumTokens; ulIndex++ ) { SPPROPERTYINFO prop; prop.pszName = L"Id"; prop.pszValue = L"Property"; prop.vValue.vt = VT_I4; prop.vValue.ulVal = ulIndex; hr = g_cpCmdGrammar->AddWordTransition( hDynamicRuleHandle, NULL, ppcDescriptionString[ulIndex], L" ", SPWT_LEXICAL, 1.0, &prop); _ASSERTE( SUCCEEDED( hr ) ); } // Commit the changes hr = g_cpCmdGrammar->Commit(0); _ASSERTE( SUCCEEDED( hr ) ); // Set the dynamic rules to active hr = g_cpCmdGrammar->SetRuleIdState( DYN_TTSVOICERULE, SPRS_ACTIVE ); _ASSERTE( SUCCEEDED( hr ) ); } // Set the hear voice rule to active hr = g_cpCmdGrammar->SetRuleIdState( VID_HearTheVoice, SPRS_ACTIVE ); _ASSERTE( SUCCEEDED( hr ) ); hr = g_cpCmdGrammar->SetRuleIdState( VID_OtherRules, SPRS_ACTIVE ); _ASSERTE( SUCCEEDED( hr ) ); InvalidateRect( hWnd, NULL, TRUE ); return ( 1 ); } case WM_DESTROY: // Windows is closing down, so we should cleanup ManageEmployeesPaneCleanup( ppszTokenIds, ppcDescriptionString, ulNumTokens ); ppszTokenIds = NULL; ppcDescriptionString = NULL; ulNumTokens = 0; return ( 1 ); case WM_HEARTHEVOICE: // Set the voice to play LoadString( g_hInst, IDS_VOICESPEAK, g_szCounterDisplay, MAX_LOADSTRING ); g_cpVoice->Speak( CT2W(g_szCounterDisplay), SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL ); return ( 1 ); case WM_MISCCOMMAND: { // Find out the offset from the first property we're interested in, so we can verify that // it's within range. UINT iSelection = (UINT)(lParam - VID_MalesOnly); if ( 2 >= iSelection ) { // If we have a new listing criteria, we basically shutdown the pane and start it again if ( iSelection != iCurEnum ) { HRESULT hr = g_cpCmdGrammar->SetRuleIdState( VID_HearTheVoice, SPRS_INACTIVE ); _ASSERTE( SUCCEEDED( hr ) ); hr = g_cpCmdGrammar->SetRuleIdState( VID_OtherRules, SPRS_INACTIVE ); _ASSERTE( SUCCEEDED( hr ) ); ManageEmployeesPaneCleanup( ppszTokenIds, ppcDescriptionString, ulNumTokens ); ppszTokenIds = NULL; ppcDescriptionString = NULL; ulNumTokens = 0; PostMessage( hWnd, WM_INITPANE, 0, (LPARAM) iSelection ); } } return ( 1 ); } case WM_TTSVOICESEL: { // If we are out of range, it is a programming error _ASSERTE( 0 <= lParam && ulNumTokens > (ULONG) lParam ); // The returned Id is an index into our tokenId table, so create a token from the id CComPtr< ISpObjectToken > pToken; HRESULT hr = SpGetTokenFromId( ppszTokenIds[lParam], &pToken, FALSE); if ( SUCCEEDED( hr ) ) { // Set our current voice from the returned token hr = g_cpVoice->SetVoice( pToken ); _ASSERTE( SUCCEEDED( hr ) ); // Change our current voice index ulCurToken = (UINT)lParam; } InvalidateRect( hWnd, NULL, TRUE ); return ( 1 ); } } return ( 0 ); }
/****************************************************************************** * ManageEmployeesPaneProc * *-------------------------* * Description: * Handles messages specifically for the manage employees pane. * ******************************************************************************/ LRESULT ManageEmployeesPaneProc( HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam ) { USES_CONVERSION; static ULONG ulNumTokens; static ULONG ulCurToken; static WCHAR** ppszTokenIds; static CSpDynamicString* ppcDesciptionString; // This is string helper class in sphelper.h switch ( message ) { case WM_GOTOOFFICE: { // Set the right message handler and repaint g_fpCurrentPane = OfficePaneProc; //Cleanup our variables ManageEmployeesPaneCleanup( ppszTokenIds, ppcDesciptionString, ulNumTokens ); ppszTokenIds = NULL; ppcDesciptionString = NULL; ulNumTokens = 0; // Set the hear voice rule to inactive HRESULT hr = g_cpCmdGrammar->SetRuleIdState( VID_HearTheVoice, SPRS_INACTIVE ); _ASSERTE( SUCCEEDED( hr ) ); PostMessage( hWnd, WM_INITPANE, NULL, NULL ); InvalidateRect( hWnd, NULL, TRUE ); return ( 1 ); } case WM_GOTOCOUNTER: { // Set the right message handler and repaint g_fpCurrentPane = CounterPaneProc; //Cleanup our variables ManageEmployeesPaneCleanup( ppszTokenIds, ppcDesciptionString, ulNumTokens ); ppszTokenIds = NULL; ppcDesciptionString = NULL; ulNumTokens = 0; // Set the hear voice rule to inactive HRESULT hr = g_cpCmdGrammar->SetRuleIdState( VID_HearTheVoice, SPRS_INACTIVE ); _ASSERTE( SUCCEEDED( hr ) ); PostMessage( hWnd, WM_INITPANE, NULL, NULL ); InvalidateRect( hWnd, NULL, TRUE ); return ( 1 ); } case WM_PAINT: { // Do the actual UI paint ManageEmployeesPanePaint( hWnd, ulNumTokens, ppcDesciptionString, ulCurToken ); return ( 1 ); } case WM_INITPANE: { ISpObjectToken *pToken = NULL; // Token interface pointer CComPtr<IEnumSpObjectTokens> cpEnum; // Pointer to token enumerator ULONG ulIndex = 0; ulCurToken = 0xffffffff; // Get a token enumerator for tts voices available HRESULT hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum); if ( S_OK == hr ) { // Get the numbers of tokens found hr = cpEnum->GetCount( &ulNumTokens ); if ( SUCCEEDED( hr ) && 0 != ulNumTokens ) { // Create arrays we need for storing data ppcDesciptionString = new CSpDynamicString [ulNumTokens]; if ( NULL == ppcDesciptionString ) { hr = E_OUTOFMEMORY; break; } ppszTokenIds = new WCHAR* [ulNumTokens]; if ( NULL == ppszTokenIds ) { hr = E_OUTOFMEMORY; break; } ZeroMemory( ppszTokenIds, ulNumTokens*sizeof( WCHAR* ) ); // Get the next token in the enumeration // State is maintained in the enumerator while (cpEnum->Next(1, &pToken, NULL) == S_OK) { // Get a string which describes the token, in our case, the voice name hr = SpGetDescription( pToken, &ppcDesciptionString[ulIndex] ); _ASSERTE( SUCCEEDED( hr ) ); // Get the token id, for a low overhead way to retrieve the token later // without holding on to the object itself hr = pToken->GetId( &ppszTokenIds[ulIndex] ); _ASSERTE( SUCCEEDED( hr ) ); ulIndex++; // Release the token itself pToken->Release(); pToken = NULL; } } // if we've failed to properly initialize, then we should completely shut-down if ( S_OK != hr ) { if ( pToken ) { pToken->Release(); } ManageEmployeesPaneCleanup( ppszTokenIds, ppcDesciptionString, ulNumTokens ); ppszTokenIds = NULL; ppcDesciptionString = NULL; ulNumTokens = 0; } // Find out which token corresponds to our voice which is currently in use else { WCHAR *pszCurTokenId = NULL; // Get the token representing the current voice HRESULT hr = g_cpVoice->GetVoice( &pToken ); if ( SUCCEEDED( hr ) ) { // Get the current token ID, and compare it against others to figure out // which desciption string is the one currently selected. hr = pToken->GetId( &pszCurTokenId ); if ( SUCCEEDED( hr ) ) { ulIndex = 0; while ( ulIndex < ulNumTokens && 0 != _wcsicmp( pszCurTokenId, ppszTokenIds[ulIndex] ) ) { ulIndex++; } // We found it, so set the current index to that of the current token if ( ulIndex < ulNumTokens ) { ulCurToken = ulIndex; } CoTaskMemFree( pszCurTokenId ); } pToken->Release(); } } } // Set the hear voice rule to active hr = g_cpCmdGrammar->SetRuleIdState( VID_HearTheVoice, SPRS_ACTIVE ); _ASSERTE( SUCCEEDED( hr ) ); return ( 1 ); } case WM_DESTROY: // Windows is closing down, so we should cleanup ManageEmployeesPaneCleanup( ppszTokenIds, ppcDesciptionString, ulNumTokens ); return ( 1 ); case WM_HEARTHEVOICE: // Set the voice to play LoadString( g_hInst, IDS_VOICESPEAK, g_szCounterDisplay, MAX_LOADSTRING ); g_cpVoice->Speak( T2W(g_szCounterDisplay), SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL ); } return ( 0 ); }
static void configure() { configure_changed = false; GtkWidget *window = gtk_dialog_new_with_buttons(_("SAPI TTS configuration"), GTK_WINDOW(plugin_info->pluginwin), GTK_DIALOG_MODAL, GTK_STOCK_OK, GTK_RESPONSE_ACCEPT, NULL); GtkWidget *vbox = gtk_vbox_new(false, 5); gtk_container_set_border_width(GTK_CONTAINER(vbox),5); gtk_container_add (GTK_CONTAINER (GTK_DIALOG(window)->vbox), vbox); GtkWidget *frame = gtk_frame_new(_("TTS voice engine")); gtk_box_pack_start(GTK_BOX(vbox), frame, false, false, 3); GtkWidget *table = gtk_table_new(2, 3, false); gtk_container_set_border_width(GTK_CONTAINER(table),8); gtk_container_add (GTK_CONTAINER (frame), table); GtkWidget *label = gtk_label_new(_("Voice :")); gtk_misc_set_alignment (GTK_MISC (label), 0, .5); gtk_table_attach(GTK_TABLE(table), label, 0, 1, 0, 1, GTK_FILL, (GtkAttachOptions)0, 5, 0); GtkWidget *combobox = gtk_combo_box_new_text(); gtk_table_attach(GTK_TABLE(table), combobox, 1, 2, 0, 1, GtkAttachOptions(GTK_FILL | GTK_EXPAND), (GtkAttachOptions)0, 5, 0); ULONG ulNumTokens; WCHAR** ppszTokenIds = NULL; CComPtr<IEnumSpObjectTokens> cpEnum; HRESULT hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum); if ( S_OK == hr ) { hr = cpEnum->GetCount( &ulNumTokens ); if ( SUCCEEDED( hr ) && 0 != ulNumTokens ) { WCHAR *pszCurTokenId = NULL; ISpObjectToken *pToken; HRESULT hr = pVoice->GetVoice( &pToken ); if ( SUCCEEDED( hr ) ) { pToken->GetId( &pszCurTokenId ); pToken->Release(); } if (pszCurTokenId) { ppszTokenIds = new WCHAR* [ulNumTokens]; ZeroMemory( ppszTokenIds, ulNumTokens*sizeof( WCHAR* ) ); LONG ulCurToken = -1; ULONG ulIndex = 0; while (cpEnum->Next(1, &pToken, NULL) == S_OK) { WCHAR *description; SpGetDescription( pToken, &description); DWORD dwNum = WideCharToMultiByte(CP_UTF8,NULL,description,-1,NULL,0,NULL,FALSE); char *text = new char[dwNum]; WideCharToMultiByte (CP_UTF8,NULL,description,-1,text,dwNum,NULL,FALSE); gtk_combo_box_append_text(GTK_COMBO_BOX(combobox), text); delete []text; CoTaskMemFree(description); pToken->GetId( &ppszTokenIds[ulIndex]); if (ulCurToken == -1 && _wcsicmp( pszCurTokenId, ppszTokenIds[ulIndex]) == 0) { ulCurToken = ulIndex; } ulIndex++; pToken->Release(); } CoTaskMemFree( pszCurTokenId ); if (ulCurToken != -1) gtk_combo_box_set_active(GTK_COMBO_BOX(combobox), ulCurToken); } } } g_signal_connect (G_OBJECT (combobox), "changed", G_CALLBACK (on_voice_combobox_changed), ppszTokenIds); label = gtk_label_new(_("Volume :")); gtk_misc_set_alignment (GTK_MISC (label), 0, .5); gtk_table_attach(GTK_TABLE(table), label, 0, 1, 1, 2, GTK_FILL, (GtkAttachOptions)0, 5, 0); GtkWidget *volume_hscale = gtk_hscale_new_with_range(0, 100, 1); gtk_table_attach(GTK_TABLE(table), volume_hscale, 1, 2, 1, 2, GtkAttachOptions(GTK_FILL | GTK_EXPAND), (GtkAttachOptions)0, 5, 0); USHORT volume; pVoice->GetVolume(&volume); gtk_range_set_value(GTK_RANGE(volume_hscale), volume); g_signal_connect(G_OBJECT(volume_hscale),"value-changed", G_CALLBACK(on_volume_hscale_value_changed), NULL); label = gtk_label_new(_("Rate :")); gtk_misc_set_alignment (GTK_MISC (label), 0, .5); gtk_table_attach(GTK_TABLE(table), label, 0, 1, 2, 3, GTK_FILL, (GtkAttachOptions)0, 5, 0); GtkWidget *rate_hscale = gtk_hscale_new_with_range(-10, 10, 1); gtk_table_attach(GTK_TABLE(table), rate_hscale, 1, 2, 2, 3, GtkAttachOptions(GTK_FILL | GTK_EXPAND), (GtkAttachOptions)0, 5, 0); long rate; pVoice->GetRate(&rate); gtk_range_set_value(GTK_RANGE(rate_hscale), rate); g_signal_connect(G_OBJECT(rate_hscale),"value-changed", G_CALLBACK(on_rate_hscale_value_changed), NULL); GtkWidget *vbox1 = gtk_vbox_new(false, 5); gtk_box_pack_start(GTK_BOX(vbox), vbox1, false, false, 10); label = gtk_label_new(_("Input the test text:")); gtk_misc_set_alignment (GTK_MISC (label), 0, .5); gtk_box_pack_start(GTK_BOX(vbox1), label, false, false, 0); GtkWidget *hbox = gtk_hbox_new(false, 5); gtk_box_pack_start(GTK_BOX(vbox1), hbox, false, false, 0); GtkWidget *entry = gtk_entry_new(); gtk_entry_set_text(GTK_ENTRY(entry), "This is the test text"); gtk_box_pack_start(GTK_BOX(hbox), entry, true, true, 0); GtkWidget *button = gtk_button_new_with_label(_("Test")); gtk_box_pack_start(GTK_BOX(hbox), button, false, false, 0); g_signal_connect(G_OBJECT(button),"clicked", G_CALLBACK(on_test_tts_button_clicked), GTK_ENTRY(entry)); gtk_widget_show_all(vbox); gtk_dialog_run(GTK_DIALOG(window)); if (configure_changed) { char *voice = NULL; ISpObjectToken *pToken; HRESULT hr = pVoice->GetVoice( &pToken ); if ( SUCCEEDED( hr ) ) { WCHAR *id; pToken->GetId( &id ); DWORD dwNum = WideCharToMultiByte(CP_UTF8,NULL,id,-1,NULL,0,NULL,FALSE); voice = new char[dwNum]; WideCharToMultiByte (CP_UTF8,NULL,id,-1,voice,dwNum,NULL,FALSE); CoTaskMemFree(id); pToken->Release(); } if (voice) { gint volume = (gint)gtk_range_get_value(GTK_RANGE(volume_hscale)); gint rate = (gint)gtk_range_get_value(GTK_RANGE(rate_hscale)); gchar *data = g_strdup_printf("[sapi_tts]\nvoice=%s\nvolume=%d\nrate=%d\n", voice, volume, rate); std::string res = get_cfg_filename(); g_file_set_contents(res.c_str(), data, -1, NULL); g_free(data); delete []voice; } } ULONG ulIndex; if ( ppszTokenIds ) { for ( ulIndex = 0; ulIndex < ulNumTokens; ulIndex++ ) { if ( NULL != ppszTokenIds[ulIndex] ) { CoTaskMemFree( ppszTokenIds[ulIndex] ); } } delete [] ppszTokenIds; } gtk_widget_destroy (window); }
void Sound::test() { ISpVoice * pVoice = NULL; ISpObjectToken* pVoiceToken=nullptr; IEnumSpObjectTokens* pEnum; ULONG ulCount = 0; if (FAILED(::CoInitialize(NULL))) { return; } HRESULT hr = S_OK; // Find the best matching installed en-us recognizer. CComPtr<ISpObjectToken> cpRecognizerToken; if (SUCCEEDED(hr)) { hr = SpFindBestToken(SPCAT_RECOGNIZERS, L"language=409", NULL, &cpRecognizerToken); } // Create the in-process recognizer and immediately set its state to inactive. CComPtr<ISpRecognizer> cpRecognizer; if (SUCCEEDED(hr)) { hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer); } if (SUCCEEDED(hr)) { hr = cpRecognizer->SetRecognizer(cpRecognizerToken); } if (SUCCEEDED(hr)) { hr = cpRecognizer->SetRecoState(SPRST_INACTIVE); } // Create a new recognition context from the recognizer. CComPtr<ISpRecoContext> cpContext; if (SUCCEEDED(hr)) { hr = cpRecognizer->CreateRecoContext(&cpContext); } // Subscribe to the speech recognition event and end stream event. if (SUCCEEDED(hr)) { ULONGLONG ullEventInterest = SPFEI(SPEI_RECOGNITION); hr = cpContext->SetInterest(ullEventInterest, ullEventInterest); } // Establish a Win32 event to signal when speech events are available. HANDLE hSpeechNotifyEvent = INVALID_HANDLE_VALUE; if (SUCCEEDED(hr)) { hr = cpContext->SetNotifyWin32Event(); } if (SUCCEEDED(hr)) { hSpeechNotifyEvent = cpContext->GetNotifyEventHandle(); if (INVALID_HANDLE_VALUE == hSpeechNotifyEvent) { // Notification handle unsupported. hr = E_NOINTERFACE; } } // Initialize an audio object to use the default audio input of the system and set the recognizer to use it. CComPtr<ISpAudio> cpAudioIn; if (SUCCEEDED(hr)) { hr = cpAudioIn.CoCreateInstance(CLSID_SpMMAudioIn); } if (SUCCEEDED(hr)) { hr = cpRecognizer->SetInput(cpAudioIn, TRUE); } // Populate a WAVEFORMATEX struct with our desired output audio format. information. WAVEFORMATEX* pWfexCoMemRetainedAudioFormat = NULL; GUID guidRetainedAudioFormat = GUID_NULL; if (SUCCEEDED(hr)) { hr = SpConvertStreamFormatEnum(SPSF_16kHz16BitMono, &guidRetainedAudioFormat, &pWfexCoMemRetainedAudioFormat); } // Instruct the recognizer to retain the audio from its recognition results. if (SUCCEEDED(hr)) { hr = cpContext->SetAudioOptions(SPAO_RETAIN_AUDIO, &guidRetainedAudioFormat, pWfexCoMemRetainedAudioFormat); } if (NULL != pWfexCoMemRetainedAudioFormat) { CoTaskMemFree(pWfexCoMemRetainedAudioFormat); } // Create a new grammar and load an SRGS grammar from file. CComPtr<ISpRecoGrammar> cpGrammar; if (SUCCEEDED(hr)) { hr = cpContext->CreateGrammar(0, &cpGrammar); } if (SUCCEEDED(hr)) { hr = cpGrammar->LoadCmdFromFile(L"grammar.grxml", SPLO_STATIC); } // Set all top-level rules in the new grammar to the active state. if (SUCCEEDED(hr)) { hr = cpGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE); } // Set the recognizer state to active to begin recognition. if (SUCCEEDED(hr)) { hr = cpRecognizer->SetRecoState(SPRST_ACTIVE_ALWAYS); } // Establish a separate Win32 event to signal the event loop exit. HANDLE hExitEvent = CreateEventW(NULL, FALSE, FALSE, NULL); // Collect the events listened for to pump the speech event loop. HANDLE rghEvents[] = { hSpeechNotifyEvent, hExitEvent }; // Speech recognition event loop. BOOL fContinue = TRUE; while (fContinue && SUCCEEDED(hr)) { // Wait for either a speech event or an exit event, with a 15 second timeout. DWORD dwMessage = WaitForMultipleObjects(sp_countof(rghEvents), rghEvents, FALSE, 15000); switch (dwMessage) { // With the WaitForMultipleObjects call above, WAIT_OBJECT_0 is a speech event from hSpeechNotifyEvent. case WAIT_OBJECT_0: { // Sequentially grab the available speech events from the speech event queue. CSpEvent spevent; while (S_OK == spevent.GetFrom(cpContext)) { switch (spevent.eEventId) { case SPEI_RECOGNITION: { // Retrieve the recognition result and output the text of that result. ISpRecoResult* pResult = spevent.RecoResult(); LPWSTR pszCoMemResultText = NULL; hr = pResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &pszCoMemResultText, NULL); if (SUCCEEDED(hr)) { wprintf(L"Recognition event received, text=\"%s\"\r\n", pszCoMemResultText); } // Also retrieve the retained audio we requested. CComPtr<ISpStreamFormat> cpRetainedAudio; if (SUCCEEDED(hr)) { hr = pResult->GetAudio(0, 0, &cpRetainedAudio); } // To demonstrate, we'll speak the retained audio back using ISpVoice. CComPtr<ISpVoice> cpVoice; if (SUCCEEDED(hr)) { hr = cpVoice.CoCreateInstance(CLSID_SpVoice); } if (SUCCEEDED(hr)) { hr = cpVoice->SpeakStream(cpRetainedAudio, SPF_DEFAULT, 0); } if (NULL != pszCoMemResultText) { CoTaskMemFree(pszCoMemResultText); } break; } } } break; } case WAIT_OBJECT_0 + 1: case WAIT_TIMEOUT: { // Exit event or timeout; discontinue the speech loop. fContinue = FALSE; //break; } } } CoUninitialize(); CComPtr <ISpVoice> cpVoice; CComPtr <ISpStream> cpStream; CSpStreamFormat cAudioFmt; //Create a SAPI Voice hr = cpVoice.CoCreateInstance(CLSID_SpVoice); //Set the audio format if (SUCCEEDED(hr)) { hr = cAudioFmt.AssignFormat(SPSF_22kHz16BitMono); } //Call SPBindToFile, a SAPI helper method, to bind the audio stream to the file if (SUCCEEDED(hr)) { hr = SPBindToFile(L"c:\\ttstemp.wav", SPFM_CREATE_ALWAYS, &cpStream, &cAudioFmt.FormatId(), cAudioFmt.WaveFormatExPtr()); } //set the output to cpStream so that the output audio data will be stored in cpStream if (SUCCEEDED(hr)) { hr = cpVoice->SetOutput(cpStream, TRUE); } //Speak the text "hello world" synchronously if (SUCCEEDED(hr)) { hr = cpVoice->Speak(L"Hello World", SPF_DEFAULT, NULL); } //close the stream if (SUCCEEDED(hr)) { hr = cpStream->Close(); } //Release the stream and voice object cpStream.Release(); cpVoice.Release(); CComPtr<ISpGrammarBuilder> cpGrammarBuilder; SPSTATEHANDLE hStateTravel; // Create (if rule does not already exist) // top-level Rule, defaulting to Active. hr = cpGrammarBuilder->GetRule(L"Travel", 0, SPRAF_TopLevel | SPRAF_Active, TRUE, &hStateTravel); // Approach 1: List all possible phrases. // This is the most intuitive approach, and it does not sacrifice efficiency // because the grammar builder will merge shared sub-phrases when possible. // There is only one root state, hStateTravel, and the terminal NULL state, // and there are six unique transitions between root state and NULL state. /* XML Approximation: <rule id="Travel"> <item> fly to Seattle </item> <item> fly to New York </item> <item> fly to Washington DC </item> <item> drive to Seattle </item> <item> drive to New York </item> <item> drive to Washington DC </item> </rule> */ // Create set of peer phrases, each containing complete phrase. // Note: the word delimiter is set as " ", so that the text we // attach to the transition can be multiple words (for example, // "fly to Seattle" is implicitly "fly" + "to" + "Seattle"): if (SUCCEEDED(hr)) { hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to Seattle", L" ", SPWT_LEXICAL, 1, NULL); } if (SUCCEEDED(hr)) { hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to New York", L" ", SPWT_LEXICAL, 1, NULL); } if (SUCCEEDED(hr)) { hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to Washington DC", L" ", SPWT_LEXICAL, 1, NULL); } if (SUCCEEDED(hr)) { hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to Seattle", L" ", SPWT_LEXICAL, 1, NULL); } if (SUCCEEDED(hr)) { hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to New York", L" ", SPWT_LEXICAL, 1, NULL); } if (SUCCEEDED(hr)) { hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to Washington DC", L" ", SPWT_LEXICAL, 1, NULL); } // Find the best matching installed en-US recognizer. //CComPtr<ISpObjectToken> cpRecognizerToken; if (SUCCEEDED(hr)) { hr = SpFindBestToken(SPCAT_RECOGNIZERS, L"language=409", NULL, &cpRecognizerToken); } // Create the in-process recognizer and immediately set its state to inactive. //CComPtr<ISpRecognizer> cpRecognizer; if (SUCCEEDED(hr)) { hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer); } if (SUCCEEDED(hr)) { hr = cpRecognizer->SetRecognizer(cpRecognizerToken); } if (SUCCEEDED(hr)) { hr = cpRecognizer->SetRecoState(SPRST_INACTIVE); } // Create a new recognition context from the recognizer. //CComPtr<ISpRecoContext> cpContext; if (SUCCEEDED(hr)) { hr = cpRecognizer->CreateRecoContext(&cpContext); } // Subscribe to the speech recognition event and end stream event. if (SUCCEEDED(hr)) { ULONGLONG ullEventInterest = SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM); hr = cpContext->SetInterest(ullEventInterest, ullEventInterest); } // Establish a Win32 event to signal when speech events are available. //HANDLE hSpeechNotifyEvent = INVALID_HANDLE_VALUE; if (SUCCEEDED(hr)) { hr = cpContext->SetNotifyWin32Event(); } if (SUCCEEDED(hr)) { hr = cpContext->SetNotifyWin32Event(); } if (SUCCEEDED(hr)) { hSpeechNotifyEvent = cpContext->GetNotifyEventHandle(); if (INVALID_HANDLE_VALUE == hSpeechNotifyEvent) { // Notification handle unsupported //hr = SPERR_UNITIALIZED; } } // Set up an audio input stream using a .wav file and set the recognizer's input. CComPtr<ISpStream> cpInputStream; if (SUCCEEDED(hr)) { hr = SPBindToFile(L"Test.wav", SPFM_OPEN_READONLY, &cpInputStream); } if (SUCCEEDED(hr)) { hr = cpRecognizer->SetInput(cpInputStream, TRUE); } // Create a new grammar and load an SRGS grammar from file. //CComPtr<ISpRecoGrammar> cpGrammar; if (SUCCEEDED(hr)) { hr = cpContext->CreateGrammar(0, &cpGrammar); } if (SUCCEEDED(hr)) { hr = cpGrammar->LoadCmdFromFile(L"grammar.grxml", SPLO_STATIC); } // Set all top-level rules in the new grammar to the active state. if (SUCCEEDED(hr)) { hr = cpGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE); } // Finally, set the recognizer state to active to begin recognition. if (SUCCEEDED(hr)) { hr = cpRecognizer->SetRecoState(SPRST_ACTIVE_ALWAYS); } hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&pVoice); if (SUCCEEDED(hr)) { hr = SpEnumTokens(SPCAT_VOICES, L"Gender=Female", NULL, &pEnum); if (SUCCEEDED(hr)) { // Get the number of voices. hr = pEnum->GetCount(&ulCount); } // Obtain a list of available voice tokens, set // the voice to the token, and call Speak. while (SUCCEEDED(hr) && ulCount--) { if (pVoiceToken != nullptr) { pVoiceToken->Release(); } if (SUCCEEDED(hr)) { hr = pEnum->Next(1, &pVoiceToken, NULL); } if (SUCCEEDED(hr)) { hr = pVoice->SetVoice(pVoiceToken); } if (SUCCEEDED(hr)) { wchar_t* start = L"<?xml version=\"1.0\" encoding=\"ISO - 8859 - 1\"?><speak version = \"1.0\" xmlns = \"http://www.w3.org/2001/10/synthesis\" xml:lang = \"en-US\">"; wchar_t* end = L"</speak>"; const wchar_t *xml = L"<voice required = \"Gender=Male\"> hi! <prosody pitch=\"fast\"> This is low pitch. </prosody><prosody volume=\"x - loud\"> This is extra loud volume. </prosody>"; wstring s = start; s += xml; s += end; hr = pVoice->Speak(xml, SPF_IS_XML| SPF_ASYNC, 0); //hr = pVoice->Speak(L"How are you?", SPF_DEFAULT, NULL); } } /* if (SUCCEEDED(hr)) { hr = pEnum->Next(1, &pVoiceToken, NULL); if (SUCCEEDED(hr)) { hr = pVoice->SetVoice(pVoiceToken); // Set the output to the default audio device. if (SUCCEEDED(hr)) { hr = pVoice->SetOutput(NULL, TRUE); if (SUCCEEDED(hr)) { hr = pVoice->Speak(L"Hello, world!", SPF_DEFAULT, 0); } } } } */ pVoice->Release(); } ::CoUninitialize(); }
static int Create (vlc_object_t *p_this) { filter_t *p_filter = (filter_t *)p_this; filter_sys_t *p_sys; HRESULT hr; p_filter->p_sys = p_sys = (filter_sys_t*) malloc(sizeof(filter_sys_t)); if (!p_sys) return VLC_ENOMEM; if (TryEnterMTA(p_this)) goto error; p_sys->cpVoice = nullptr; hr = CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_INPROC_SERVER, IID_ISpVoice, (void**) &p_sys->cpVoice); if (SUCCEEDED(hr)) { ISpObjectToken* cpVoiceToken = nullptr; IEnumSpObjectTokens* cpEnum = nullptr; ULONG ulCount = 0; hr = SpEnumTokens(SPCAT_VOICES, nullptr, nullptr, &cpEnum); if (SUCCEEDED(hr)) { // Get the number of voices. hr = cpEnum->GetCount(&ulCount); if (SUCCEEDED (hr)) { int voiceIndex = var_InheritInteger(p_this, "sapi-voice"); if (voiceIndex > - 1) { if (voiceIndex < ulCount) { hr = cpEnum->Item(voiceIndex, &cpVoiceToken); if (SUCCEEDED(hr)) { hr = p_sys->cpVoice->SetVoice(cpVoiceToken); if (SUCCEEDED(hr)) { msg_Dbg(p_this, "Selected voice %d", voiceIndex); } else { msg_Err(p_this, "Failed to set voice %d", voiceIndex); } cpVoiceToken->Release(); cpVoiceToken = nullptr; } } else msg_Err(p_this, "Voice index exceeds available count"); } } cpEnum->Release(); cpEnum = nullptr; } if (SUCCEEDED(hr)) { hr = p_sys->cpVoice->SetOutput(nullptr, TRUE); } } else { msg_Err(p_filter, "Could not create SpVoice"); } LeaveMTA(); p_filter->pf_render_text = RenderText; return VLC_SUCCESS; error: free(p_sys); return VLC_EGENERIC; }