QtSpeech::VoiceNames QtSpeech::voices()
{
    VoiceNames vs;       
    ULONG count = 0;
    CComPtr<IEnumSpObjectTokens> voices;

    CoInitialize(NULL);
    SysCall( SpEnumTokens(SPCAT_VOICES, NULL, NULL, &voices), LogicError);
    SysCall( voices->GetCount(&count), LogicError);

    for(int i=0; i< count; i++) {
        WCHAR * w_id = 0L;
        WCHAR * w_name = 0L;
        CComPtr<ISpObjectToken> voice;
        SysCall( voices->Next( 1, &voice, NULL ), LogicError);
        SysCall( SpGetDescription(voice, &w_name), LogicError);
        SysCall( voice->GetId(&w_id), LogicError);

        QString id = QString::fromWCharArray(w_id);
        QString name = QString::fromWCharArray(w_name);
        VoiceName n = { id, name };
        vs << n;

        voice.Release();
    }
    return vs;
}
Example #2
0
//------------------------------------------------------------------------------
bool SpeechApi51::setVoice(const std::wstring &voice)
{
	m_voice = voice;

	if (!isLoaded())
	{
		return true;
	}

	// get a voice enumerator
	CComPtr<IEnumSpObjectTokens> cpEnum;
	if (FAILED(SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum)))
	{
		return false;
	}

	// iterate through the list till we find a matching voice
	ISpObjectToken *voice_token;
	while (S_OK == cpEnum->Next(1, &voice_token, NULL))
	{
		CSpDynamicString voice_str;

		if (SUCCEEDED(SpGetDescription(voice_token, &voice_str))
			&& (voice == voice_str.Copy()))
		{
			m_sapi->SetVoice(voice_token);
			return true;
		}
	}

	return false;
}
Example #3
0
//------------------------------------------------------------------------------
std::vector<std::wstring> SpeechApi51::getVoices() const
{
	std::vector<std::wstring> ret;
	
	CoInitialize(NULL);

	// get a voice enumerator
	CComPtr<IEnumSpObjectTokens> cpEnum;
	if (S_OK != SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum))
	{
		return ret;
	}

	// iterate through the voices and add them to the string vector
	ISpObjectToken *voice_token;
	while (S_OK == cpEnum->Next(1, &voice_token, NULL))
	{
		CSpDynamicString voice_str;

		if (SUCCEEDED(SpGetDescription(voice_token, &voice_str)))
		{
			ret.push_back(voice_str.Copy());
		}
	}

	return ret;
}
Example #4
0
int main(int argc, char* argv[])
{
    //Parse de parameters. Similar way to acapelaCmd
    std::string voice = "iCub_eng";
    if (argc>1)
        voice = argv[1];
    std::cout << "Voice is: " << voice << std::endl;
    std::cout << "TODO : select the right token from this option." << voice << std::endl;

    std::string textInput;
    std::getline(std::cin, textInput);
    std::cout << "Text is: " << textInput << std::endl;

    if (::CoInitializeEx(NULL, COINIT_MULTITHREADED) == S_OK)
    {
        HRESULT hr = S_OK;
        CComPtr<IEnumSpObjectTokens> cpIEnum;
        CComPtr<ISpObjectToken> cpToken;
        CComPtr<ISpVoice> cpVoice;

        // Enumerate voice tokens that speak US English in a female voice.
        hr = SpEnumTokens(SPCAT_VOICES, L"Language=409", L"Gender=Female;", &cpIEnum);

        // Get the best matching token.
        if (SUCCEEDED(hr))
        {
            hr = cpIEnum->Next(1, &cpToken , NULL);
        }

        // Create a voice and set its token to the one we just found.
        if (SUCCEEDED(hr))
        {
            hr = cpVoice.CoCreateInstance(CLSID_SpVoice);
        }

        // Set the voice.
        if (SUCCEEDED(hr))
        {
            hr = cpVoice->SetVoice(cpToken);
        }

        // Set the output to the default audio device.
        if (SUCCEEDED(hr))
        {
            hr = cpVoice->SetOutput(NULL, TRUE);
        }

        // Speak a string directly.
        if (SUCCEEDED(hr))
        {
            hr = cpVoice->Speak(s2ws(textInput).c_str(), NULL, NULL);
        }
    }
    ::CoUninitialize();

    return 0;
}
DWORD WINAPI HandRaisExcer::Txt2SpeechThread()
{		
		HRESULT                        hr = S_OK;
		CComPtr<ISpObjectToken>        cpAudioOutToken;
		CComPtr<IEnumSpObjectTokens>   cpEnum;
		CComPtr<ISpVoice>              cpVoice;
		ULONG                          ulCount = 0;

		if (FAILED(::CoInitialize(NULL)))
				return FALSE; 
		// Create the SAPI voice.
		hr = cpVoice.CoCreateInstance(CLSID_SpVoice);

		if (SUCCEEDED (hr))
		{
		   // Enumerate the available audio output devices.
		   hr = SpEnumTokens( SPCAT_AUDIOOUT, NULL, NULL, &cpEnum);
		}

		if (SUCCEEDED (hr))
		{
		   // Get the number of audio output devices.
		   hr = cpEnum->GetCount( &ulCount);
		}

		if (SUCCEEDED (hr))
		{
			hr = cpEnum->Next( 1, &cpAudioOutToken, NULL );
		}

		if (SUCCEEDED (hr))
		{
			hr = cpVoice->SetOutput( cpAudioOutToken, TRUE );
		}

		if (SUCCEEDED (hr))
		{
			cpVoice->SetRate(-1);
			while (TRUE)
			{
				if (m_isHandReinitial)
				{
					m_isHandReinitial = FALSE;
					Sleep(100);   //Refresh m_pStrToSpeak					
					cpVoice->Speak(m_pStrToSpeak, SPF_DEFAULT, NULL);	   
				}
			}
		}

		::CoUninitialize();

		if (FAILED(hr)) return FALSE;

	return TRUE;
}
Example #6
0
HRESULT TTSLib::Initialize()
{
	HRESULT hr = S_OK;
	CComPtr<IEnumSpObjectTokens>   cpEnum;
	ULONG                          ulCount = 0;

	//Init SAPI
	if (FAILED(::CoInitialize(NULL)))
	{
		m_lastErrorMessage = L"CoInitialize failed!";
		return FALSE;
	}

	// Create the SAPI voice.
	hr = m_ispVoice.CoCreateInstance(CLSID_SpVoice);

	if (FAILED(hr))
	{
		m_lastErrorMessage = L"CoCreateInstance failed!";
		return FALSE;
	}

	// Enumerate the available voices.
	hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);

	if (FAILED(hr))
	{
		m_lastErrorMessage = L"SpEnumTokens failed!";
		return FALSE;
	}

	//Get count
	hr = cpEnum->GetCount(&ulCount);
	if (FAILED(hr))
	{
		m_lastErrorMessage = L"GetCount cpEnum failed!";
		return FALSE;
	}

	//Get available object token voices in system
	m_ispObjectTokens.resize(ulCount);
	for (size_t i = 0; i < ulCount; ++i)
	{
		cpEnum->Next(1, &m_ispObjectTokens[i], NULL);
	}

	return TRUE;
}
void DumpCategory(LPCWSTR category) {
    // enumerate tokens in each category
    IEnumSpObjectTokens *pEnumSpObjectTokens = nullptr;
    HRESULT hr = SpEnumTokens(category, nullptr, nullptr, &pEnumSpObjectTokens);
    if (SPERR_NOT_FOUND == hr) {
        LOG(L"  None found.");
        return;
    } else if (FAILED(hr)) {
        ERR(L"SpEnumTokens failed: hr = 0x%08x", hr);
        return;
    }
    ReleaseOnExit rEnumSpObjectTokens(pEnumSpObjectTokens);
    
    ULONG nTokens = 0;
    hr = pEnumSpObjectTokens->GetCount(&nTokens);
    if (FAILED(hr)) {
        ERR(L"IEnumSpObjectTokens::GetCount failed: hr = 0x%08x", hr);
        return;
    }
    
    for (ULONG token = 0; token < nTokens; token++) {
        ISpObjectToken *pSpObjectToken = nullptr;
        hr = pEnumSpObjectTokens->Next(1, &pSpObjectToken, nullptr);
        if (FAILED(hr)) {
            ERR(L"IEnumSpObjectTokens::Next failed: hr = 0x%08x", hr);
            return;
        }
        ReleaseOnExit rSpObjectToken(pSpObjectToken);
        
        LPWSTR description = nullptr;
        hr = SpGetDescription(pSpObjectToken, &description);
        if (FAILED(hr)) {
            ERR(L"SpGetDescription failed: hr = 0x%08x", hr);
            continue;
        }
        CoTaskMemFreeOnExit fDescription(description);
        
        LOG(L"  #%u: %s", token + 1, description);
 
        EnumDataKey(2, pSpObjectToken);
    }
}
Example #8
0
bool speech_set_voice(int voice)
{
#ifdef _WIN32
	HRESULT                             hr;
	CComPtr<ISpObjectToken>             cpVoiceToken;
	CComPtr<IEnumSpObjectTokens>        cpEnum;
	ULONG                               num_voices = 0;

	//Enumerate the available voices 
	hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);

	if(FAILED(hr)) return false;

    hr = cpEnum->GetCount(&num_voices);

	if(FAILED(hr)) return false;

	int count = 0;
	// Obtain a list of available voice tokens, set the voice to the token, and call Speak
	while (num_voices -- )
	{
		cpVoiceToken.Release();
		
		hr = cpEnum->Next( 1, &cpVoiceToken, NULL );

		if(FAILED(hr)) {
			return false;
		}

		if(count == voice) {
			return SUCCEEDED(Voice_device->SetVoice(cpVoiceToken));
		}

		count++;
	}
	return false;
#else
	STUB_FUNCTION;

	return true;
#endif
}
TTSWidget::TTSWidget(QWidget* parent /*= 0*/, Qt::WFlags flags /*= 0*/) : QWidget(parent, flags)
{
	setupUi(this);

#ifdef _WIN32
	CoInitialize(NULL);
	HRESULT                             hr = S_OK;
	CComPtr<ISpObjectToken>             cpVoiceToken;
	CComPtr<IEnumSpObjectTokens>        cpEnum;
	CComPtr<ISpVoice>                   cpVoice;
	ULONG                               ulCount = 0;
	
	// Create the SAPI voice
	if(SUCCEEDED(hr))
		hr = cpVoice.CoCreateInstance( CLSID_SpVoice ); 
	//Enumerate the available voices 
	if(SUCCEEDED(hr))
		hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
	//Get the number of voices
	if(SUCCEEDED(hr))
		hr = cpEnum->GetCount(&ulCount);
	// Obtain a list of available voice tokens
	while (SUCCEEDED(hr) && ulCount-- )
	{
		cpVoiceToken.Release();
		if(SUCCEEDED(hr))
			hr = cpEnum->Next( 1, &cpVoiceToken, NULL );
		WCHAR* voiceName = 0;
		if (SUCCEEDED(SpGetDescription(cpVoiceToken, &voiceName)))
		{
			m_voice->addItem(QString::fromUtf16((const ushort*) voiceName));
			CoTaskMemFree(voiceName);
		}
	}
	CoUninitialize();
#endif

	connect(m_voice, SIGNAL(currentIndexChanged(int)), this, SLOT(voiceChanged(int)));
	connect(m_speakButton, SIGNAL(clicked()), this, SLOT(speak()));	
}
QtSpeech::QtSpeech(VoiceName n, QObject * parent)
    :QObject(parent), d(new Private)
{
    ULONG count = 0;
    CComPtr<IEnumSpObjectTokens> voices;

    CoInitialize(NULL);
    SysCall( d->voice.CoCreateInstance( CLSID_SpVoice ), InitError);

    if (n.id.isEmpty()) {
        WCHAR * w_id = 0L;
        WCHAR * w_name = 0L;
        CComPtr<ISpObjectToken> voice;
        SysCall( d->voice->GetVoice(&voice), InitError);
        SysCall( SpGetDescription(voice, &w_name), InitError);
        SysCall( voice->GetId(&w_id), InitError);
        n.name = QString::fromWCharArray(w_name);
        n.id = QString::fromWCharArray(w_id);
        voice.Release();
    }
    else {
        SysCall( SpEnumTokens(SPCAT_VOICES, NULL, NULL, &voices), InitError);
        SysCall( voices->GetCount(&count), InitError);
        for (int i =0; i< count; i++) {
            WCHAR * w_id = 0L;
            CComPtr<ISpObjectToken> voice;
            SysCall( voices->Next( 1, &voice, NULL ), InitError);
            SysCall( voice->GetId(&w_id), InitError);
            QString id = QString::fromWCharArray(w_id);
            if (id == n.id) d->voice->SetVoice(voice);
            voice.Release();
        }
    }

    if (n.id.isEmpty())
        throw InitError(Where+"No default voice in system");

    d->name = n;
    d->ptrs << this;
}
Example #11
0
bool SapiInterface::SetVoiceNative(CString voice)
{
	CComPtr<IEnumSpObjectTokens>    cpEnum;
	CSpDynamicString*				szDescription;
	CComPtr<ISpObjectToken>         cpVoiceToken; //the token is the voice
	CComPtr<ISpVoice>               cpVoice;
	ULONG                           ulCount = 0;

	//This gets an enumeration of all voices on the system
	hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
	if (!SUCCEEDED(hr) ){
		return false; //enumeration of voices failed
	}

	hr = cpEnum->GetCount(&ulCount);
	szDescription = new CSpDynamicString [ulCount];
	UINT i = 0;
	while (SUCCEEDED(hr) && ulCount--)
	{
	   cpVoiceToken.Release();
	   if (SUCCEEDED (hr))
	   {
		  hr = cpEnum->Next(1, &cpVoiceToken, NULL);
		  hr = SpGetDescription(cpVoiceToken, &szDescription[i]);
	   }
	   if (SUCCEEDED (hr))
	   {
		  if(CString(szDescription[i]) == voice) {
				hr = pVoice->SetVoice(cpVoiceToken);
				delete [] szDescription;
				return true;
		   }
	   }
		i++;
	}
	//delete the voice list if one is not found and return false
	delete [] szDescription;
	return false;
}
Example #12
0
void CSpeechSynthesizer::Initialize()
{
	// Language=C09;Language=809;Language=409;Language=411;Language=412;Language=804;Language=C04;Language=404;
	m_tokenMap = new std::map<std::string*, std::string*>();

	m_registryList = new std::map<std::string*, std::string*>();
	getVoicesInRegistry();

	CoInitialize(cpIEnum);

	// Enumerate voice tokens that speak US English in a female voice.
	HRESULT hr = SpEnumTokens(SPCAT_VOICES, L"", L"Gender=Female;", &cpIEnum);

	// Get the best matching token.
	if (SUCCEEDED(hr))
	{
		hr = cpIEnum->Next(1, &cpToken, NULL);
	}

	// Create a voice and set its token to the one we just found.
	if (SUCCEEDED(hr))
	{
		hr = cpVoice.CoCreateInstance(CLSID_SpVoice);
	}

	// Set the voice.
	if (SUCCEEDED(hr))
	{
		hr = cpVoice->SetVoice(cpToken);
	}

	// Set the output to the default audio device.
	if (SUCCEEDED(hr))
	{
		hr = cpVoice->SetOutput(NULL, TRUE);
	}

	m_initialized = true;
}
Example #13
0
/*
 * Some notes about this function:
 * This function returns a list of available SAPI voices. It has been fixed and should be working correclty
 * on Windows 7 and all other versions of Windows.
 *
 * The solution to getting this function working was found here:
 * http://stackoverflow.com/questions/4336245/how-to-return-a-java-string-in-c-using-jni
 *
 * In addition to the above StackOverflow thread, it was necessary to change this function to return an std::string instance instead
 * of a character pointer, and to use stringstreams instead of CStrings.
 */
std::string SapiInterface::GetVoiceNative()
{
	
	std::stringstream sstream;
	sstream << "<?xml version=\"1.0\"?>";

	ISpVoice * pVoice = NULL;	
	CComPtr<ISpObjectToken>        cpVoiceToken;
	CComPtr<IEnumSpObjectTokens>   cpEnum;
	CComPtr<ISpVoice>              cpVoice;
	ULONG                          ulCount = 0;

	HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&pVoice);
	if(SUCCEEDED(hr))
	{
		WCHAR   **m_ppszTokenIds;
		USES_CONVERSION;
		CComPtr<IEnumSpObjectTokens>    cpEnum;
		WCHAR**							szDescription;
		ISpObjectToken                  *pToken = NULL;
		CComPtr<ISpObjectToken>         cpVoiceToken; //the token is the voice
		CComPtr<ISpVoice>               cpVoice;
		ULONG                           ulCount = 0;

		if(SUCCEEDED(hr))
		{
			hr = cpVoice.CoCreateInstance( CLSID_SpVoice );
			if(SUCCEEDED(hr))
			{
				WCHAR *pszCurTokenId = NULL;
				ULONG ulIndex = 0, ulNumTokens = 0, ulCurToken = -1;

				hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);

				if (hr == S_OK)
				{
					hr = cpEnum->GetCount( &ulNumTokens );

					if (SUCCEEDED(hr) && ulNumTokens != 0)
					{
						szDescription = new WCHAR* [ulNumTokens];
						m_ppszTokenIds = new WCHAR* [ulNumTokens];

						ZeroMemory(m_ppszTokenIds, ulNumTokens * sizeof(WCHAR *));

						UINT i =0;
						while (cpEnum->Next(1, &pToken, NULL) == S_OK)
						{
							//Don't care about return value in next line:
							hr = SpGetDescription(pToken, &szDescription[ulIndex]);
							ulIndex++;

							int utf8_length = WideCharToMultiByte(
								  CP_UTF8,           // Convert to UTF-8
								  0,                 // No special character conversions required 
													 // (UTF-16 and UTF-8 support the same characters)
								  szDescription[i],             // UTF-16 string to convert
								  -1,                // utf16 is NULL terminated (if not, use length)
								  NULL,              // Determining correct output buffer size
								  0,                 // Determining correct output buffer size
								  NULL,              // Must be NULL for CP_UTF8
								  NULL);             // Must be NULL for CP_UTF8

							if (utf8_length == 0) {
								return std::string("WideCharToMultibyte error.");
							}
							
							char *utf8_voice = new char[utf8_length];
							utf8_length = WideCharToMultiByte(
							  CP_UTF8,           // Convert to UTF-8
							  0,                 // No special character conversions required 
												 // (UTF-16 and UTF-8 support the same characters)
							  szDescription[i],             // UTF-16 string to convert
							  -1,                // utf16 is NULL terminated (if not, use length)
							  utf8_voice,              // UTF-8 output buffer
							  utf8_length,       // UTF-8 output buffer size
							  NULL,              // Must be NULL for CP_UTF8
							  NULL);             // Must be NULL for CP_UTF8
							//strConcatenateXML += "<voice>" + szDescription[i] + "</voice>";

							sstream << "<voice>" << utf8_voice << "</voice>";
							pToken->Release();
							pToken = NULL;
							i++;
						}

						delete [] szDescription;
					}
					else
						{
						//strConcatenateXML = "No voice found. (5)";
						sstream << "No voice found. (5)";
					}
				}
				else
				{
					//strConcatenateXML = "No voice found. (4)";
					sstream << "No voice found. (4)";
				}
			}
			else
			{
				//strConcatenateXML = "No voice found. (3)";
				sstream << "No voice found. (3)";
			}
		}
		else
		{
			//strConcatenateXML = "No voice found. (2)";
			sstream << "No voice found. (2)";
		}
	}
	else
	{
		//strConcatenateXML = "No voice found. (1)";
		sstream << "No voice found. (1)";
	}

	return sstream.str();
}
/******************************************************************************
* ManageEmployeesPaneProc *
*-------------------------*
*   Description:
*       Handles messages specifically for the manage employees pane.
*
******************************************************************************/
LRESULT ManageEmployeesPaneProc( HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam )
{
    static ULONG ulNumTokens;
    static ULONG ulCurToken;
    static WCHAR**  ppszTokenIds;
    static CSpDynamicString*  ppcDescriptionString;     // This is string helper class in sphelper.h
    static UINT iCurEnum;       // Indicates if we should list males, females, or both
    
    switch ( message )
    {
        case WM_GOTOOFFICE:
        {
            // Set the right message handler and repaint
            g_fpCurrentPane = OfficePaneProc;
            //Cleanup our variables
            ManageEmployeesPaneCleanup( ppszTokenIds, ppcDescriptionString, ulNumTokens );            
            ppszTokenIds = NULL;
            ppcDescriptionString = NULL;
            ulNumTokens = 0;

            // Set the hear voice rule to inactive
            HRESULT hr = g_cpCmdGrammar->SetRuleIdState( VID_HearTheVoice, SPRS_INACTIVE );            
            _ASSERTE( SUCCEEDED( hr ) );
            hr = g_cpCmdGrammar->SetRuleIdState( VID_OtherRules, SPRS_INACTIVE );            
            _ASSERTE( SUCCEEDED( hr ) );
            hr = g_cpCmdGrammar->SetRuleIdState( DYN_TTSVOICERULE, SPRS_ACTIVE );            
            _ASSERTE( SUCCEEDED( hr ) );

            PostMessage( hWnd, WM_INITPANE, NULL, NULL );
            InvalidateRect( hWnd, NULL, TRUE );
            return ( 1 );
        }

        case WM_GOTOCOUNTER:
        {
            // Set the right message handler and repaint
            g_fpCurrentPane = CounterPaneProc;
            //Cleanup our variables
            ManageEmployeesPaneCleanup( ppszTokenIds, ppcDescriptionString, ulNumTokens );            
            ppszTokenIds = NULL;
            ppcDescriptionString = NULL;
            ulNumTokens = 0;

            // Set the hear voice rule to inactive
            HRESULT hr = g_cpCmdGrammar->SetRuleIdState( VID_HearTheVoice, SPRS_INACTIVE );            
            _ASSERTE( SUCCEEDED( hr ) );
            hr = g_cpCmdGrammar->SetRuleIdState( VID_OtherRules, SPRS_INACTIVE );            
            _ASSERTE( SUCCEEDED( hr ) );
            hr = g_cpCmdGrammar->SetRuleIdState( DYN_TTSVOICERULE, SPRS_ACTIVE );            
            _ASSERTE( SUCCEEDED( hr ) );

            PostMessage( hWnd, WM_INITPANE, NULL, NULL );
            InvalidateRect( hWnd, NULL, TRUE );
            return ( 1 );
        }
        case WM_PAINT:
        {
            // Do the actual UI paint
            ManageEmployeesPanePaint( hWnd, ulNumTokens, ppcDescriptionString, ulCurToken, iCurEnum );
            return ( 1 );
        }

        case WM_INITPANE:
        {
            ISpObjectToken                  *pToken = NULL;  // Token interface pointer
            CComPtr<IEnumSpObjectTokens>    cpEnum;          // Pointer to token enumerator
            ULONG                           ulIndex = 0;
            ulCurToken = 0xffffffff;
            WCHAR *szRequiredAttributes = NULL;
            SPSTATEHANDLE                   hDynamicRuleHandle;  // Handle to our dynamic rule

            // Set the required attributes field for the enum if we have special needs
            // based on our LPARAM in
            if ( 0 == lParam )
            {
                szRequiredAttributes = L"Gender=Male";
            }
            else if ( 1 == lParam )
            {
                szRequiredAttributes = L"Gender=Female";
            }

            // Get a token enumerator for tts voices available
            HRESULT hr = SpEnumTokens(SPCAT_VOICES, szRequiredAttributes, NULL, &cpEnum);
            if ( S_OK == hr )
            {
                // Get the numbers of tokens found
                hr = cpEnum->GetCount( &ulNumTokens );

                if ( SUCCEEDED( hr ) && 0 != ulNumTokens )
                {
                    // Create arrays we need for storing data
                    ppcDescriptionString = new CSpDynamicString [ulNumTokens];
                    if ( NULL == ppcDescriptionString )
                    {
                        break;
                    }

                    ppszTokenIds = new WCHAR* [ulNumTokens];
                    if ( NULL == ppszTokenIds )
                    {
                        break;
                    }
                    ZeroMemory( ppszTokenIds, ulNumTokens*sizeof( WCHAR* ) );                    
                    
                    // Get the next token in the enumeration
                    // State is maintained in the enumerator
                    while (cpEnum->Next(1, &pToken, NULL) == S_OK)
                    {
                        // Get a string which describes the token, in our case, the voice name
                        hr = SpGetDescription( pToken, &ppcDescriptionString[ulIndex] );
                        _ASSERTE( SUCCEEDED( hr ) );
                        
                        // Get the token id, for a low overhead way to retrieve the token later
                        // without holding on to the object itself
                        hr = pToken->GetId( &ppszTokenIds[ulIndex] );
                        _ASSERTE( SUCCEEDED( hr ) );
                        
                        ulIndex++;
                        
                        // Release the token itself
                        pToken->Release();
                        pToken = NULL;
                    }                   
                }
                
                // if we've failed to properly initialize, then we should completely shut-down
                if ( S_OK != hr )
                {
                    if ( pToken )
                    {
                        pToken->Release();
                    }
                    ManageEmployeesPaneCleanup( ppszTokenIds, ppcDescriptionString, ulNumTokens );

                    ppszTokenIds = NULL;
                    ppcDescriptionString = NULL;
                    ulNumTokens = 0;
                }
                // Find out which token corresponds to our voice which is currently in use
                else
                {
                    WCHAR *pszCurTokenId = NULL;

                    // Get the token representing the current voice
                    hr = g_cpVoice->GetVoice( &pToken );
                    if ( SUCCEEDED( hr ) )
                    {
                        // Get the current token ID, and compare it against others to figure out
                        // which description string is the one currently selected.
                        hr = pToken->GetId( &pszCurTokenId );
                        if ( SUCCEEDED( hr ) )
                        {
                            ulIndex = 0;
                            while ( ulIndex < ulNumTokens && 
                                    0 != _wcsicmp( pszCurTokenId, ppszTokenIds[ulIndex] ) )
                            {
                                ulIndex++;
                            }

                            // We found it, so set the current index to that of the current token
                            if ( ulIndex < ulNumTokens )
                            {
                                ulCurToken = ulIndex;
                            }

                            CoTaskMemFree( pszCurTokenId );
                        }

                        pToken->Release();

                    }                                       

                }
            
            }

            // Initially, we see both genders
            _ASSERTE( lParam >= 0 && lParam <= 2);
            iCurEnum = (UINT)lParam;

            // Create a dynamic rule containing the description strings of the voice tokens
            hr = g_cpCmdGrammar->GetRule(NULL, DYN_TTSVOICERULE, SPRAF_TopLevel | SPRAF_Active | SPRAF_Dynamic, TRUE, &hDynamicRuleHandle);
            if ( SUCCEEDED( hr ) )
            {
                // Clear the rule first
                hr = g_cpCmdGrammar->ClearRule( hDynamicRuleHandle );
                _ASSERTE( SUCCEEDED( hr ) );

                // Commit the changes
                hr = g_cpCmdGrammar->Commit(0);
                _ASSERTE( SUCCEEDED( hr ) );
                
                // Add description names as the word, ulIndex as id
                for ( ulIndex = 0; ulIndex < ulNumTokens; ulIndex++ )
                {
                    SPPROPERTYINFO prop;
                    prop.pszName = L"Id";
                    prop.pszValue = L"Property";
                    prop.vValue.vt = VT_I4;
                    prop.vValue.ulVal = ulIndex;
                    hr = g_cpCmdGrammar->AddWordTransition( hDynamicRuleHandle, NULL, ppcDescriptionString[ulIndex], L" ",
                                                           SPWT_LEXICAL, 1.0, &prop);
                    _ASSERTE( SUCCEEDED( hr ) );                   
                }

                // Commit the changes
                hr = g_cpCmdGrammar->Commit(0);
                _ASSERTE( SUCCEEDED( hr ) );

                // Set the dynamic rules to active
                hr = g_cpCmdGrammar->SetRuleIdState( DYN_TTSVOICERULE, SPRS_ACTIVE );            
                _ASSERTE( SUCCEEDED( hr ) );
            }

            // Set the hear voice rule to active
            hr = g_cpCmdGrammar->SetRuleIdState( VID_HearTheVoice, SPRS_ACTIVE );            
            _ASSERTE( SUCCEEDED( hr ) );
            hr = g_cpCmdGrammar->SetRuleIdState( VID_OtherRules, SPRS_ACTIVE );            
            _ASSERTE( SUCCEEDED( hr ) );

            InvalidateRect( hWnd, NULL, TRUE );
            return ( 1 );
        }

        case WM_DESTROY:
            // Windows is closing down, so we should cleanup
            ManageEmployeesPaneCleanup( ppszTokenIds, ppcDescriptionString, ulNumTokens );
            ppszTokenIds = NULL;
            ppcDescriptionString = NULL;
            ulNumTokens = 0;
            return ( 1 );

        case WM_HEARTHEVOICE:
            // Set the voice to play
            LoadString( g_hInst, IDS_VOICESPEAK, g_szCounterDisplay, MAX_LOADSTRING );
            g_cpVoice->Speak( CT2W(g_szCounterDisplay), SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL );
            return ( 1 );

        case WM_MISCCOMMAND:
        {
            // Find out the offset from the first property we're interested in, so we can verify that
            // it's within range.
            UINT iSelection = (UINT)(lParam - VID_MalesOnly);
            if ( 2 >= iSelection )
            {
                // If we have a new listing criteria, we basically shutdown the pane and start it again
                if ( iSelection != iCurEnum )
                {
                    HRESULT hr = g_cpCmdGrammar->SetRuleIdState( VID_HearTheVoice, SPRS_INACTIVE );            
                    _ASSERTE( SUCCEEDED( hr ) );
                    hr = g_cpCmdGrammar->SetRuleIdState( VID_OtherRules, SPRS_INACTIVE );            
                    _ASSERTE( SUCCEEDED( hr ) );

                    ManageEmployeesPaneCleanup( ppszTokenIds, ppcDescriptionString, ulNumTokens );
                    ppszTokenIds = NULL;
                    ppcDescriptionString = NULL;
                    ulNumTokens = 0;
        
                    PostMessage( hWnd, WM_INITPANE, 0, (LPARAM) iSelection );
                }
            }
            return ( 1 );
        }

        case WM_TTSVOICESEL:
        {
            // If we are out of range, it is a programming error
            _ASSERTE( 0 <= lParam && ulNumTokens > (ULONG) lParam );

            // The returned Id is an index into our tokenId table, so create a token from the id
            CComPtr< ISpObjectToken >   pToken;
            HRESULT hr = SpGetTokenFromId( ppszTokenIds[lParam], &pToken, FALSE);
            if ( SUCCEEDED( hr ) )
            {
                // Set our current voice from the returned token
                hr = g_cpVoice->SetVoice( pToken );
                _ASSERTE( SUCCEEDED( hr ) );

                // Change our current voice index
                ulCurToken = (UINT)lParam;
            }

            InvalidateRect( hWnd, NULL, TRUE );
            return ( 1 );
        }

    }
    return ( 0 );
}
Example #15
0
/******************************************************************************
* ManageEmployeesPaneProc *
*-------------------------*
*   Description:
*       Handles messages specifically for the manage employees pane.
*
******************************************************************************/
LRESULT ManageEmployeesPaneProc( HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam )
{
    USES_CONVERSION;
    
    static ULONG ulNumTokens;
    static ULONG ulCurToken;
    static WCHAR**  ppszTokenIds;
    static CSpDynamicString*  ppcDesciptionString;     // This is string helper class in sphelper.h
    
    switch ( message )
    {
        case WM_GOTOOFFICE:
        {
            // Set the right message handler and repaint
            g_fpCurrentPane = OfficePaneProc;
            //Cleanup our variables
            ManageEmployeesPaneCleanup( ppszTokenIds, ppcDesciptionString, ulNumTokens );            
            ppszTokenIds = NULL;
            ppcDesciptionString = NULL;
            ulNumTokens = 0;

            // Set the hear voice rule to inactive
            HRESULT hr = g_cpCmdGrammar->SetRuleIdState( VID_HearTheVoice, SPRS_INACTIVE );            
            _ASSERTE( SUCCEEDED( hr ) );

            PostMessage( hWnd, WM_INITPANE, NULL, NULL );
            InvalidateRect( hWnd, NULL, TRUE );
            return ( 1 );
        }

        case WM_GOTOCOUNTER:
        {
            // Set the right message handler and repaint
            g_fpCurrentPane = CounterPaneProc;
            //Cleanup our variables
            ManageEmployeesPaneCleanup( ppszTokenIds, ppcDesciptionString, ulNumTokens );            
            ppszTokenIds = NULL;
            ppcDesciptionString = NULL;
            ulNumTokens = 0;

            // Set the hear voice rule to inactive
            HRESULT hr = g_cpCmdGrammar->SetRuleIdState( VID_HearTheVoice, SPRS_INACTIVE );            
            _ASSERTE( SUCCEEDED( hr ) );

            PostMessage( hWnd, WM_INITPANE, NULL, NULL );
            InvalidateRect( hWnd, NULL, TRUE );
            return ( 1 );
        }
        case WM_PAINT:
        {
            // Do the actual UI paint
            ManageEmployeesPanePaint( hWnd, ulNumTokens, ppcDesciptionString, ulCurToken );
            return ( 1 );
        }

        case WM_INITPANE:
        {
            ISpObjectToken                  *pToken = NULL;  // Token interface pointer
            CComPtr<IEnumSpObjectTokens>    cpEnum;          // Pointer to token enumerator
            ULONG                           ulIndex = 0;
            ulCurToken = 0xffffffff;

            // Get a token enumerator for tts voices available
            HRESULT hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
            if ( S_OK == hr )
            {
                // Get the numbers of tokens found
                hr = cpEnum->GetCount( &ulNumTokens );

                if ( SUCCEEDED( hr ) && 0 != ulNumTokens )
                {
                    // Create arrays we need for storing data
                    ppcDesciptionString = new CSpDynamicString [ulNumTokens];
                    if ( NULL == ppcDesciptionString )
                    {
                        hr = E_OUTOFMEMORY;
                        break;
                    }

                    ppszTokenIds = new WCHAR* [ulNumTokens];
                    if ( NULL == ppszTokenIds )
                    {
                        hr = E_OUTOFMEMORY;
                        break;
                    }
                    ZeroMemory( ppszTokenIds, ulNumTokens*sizeof( WCHAR* ) );                    
                    
                    // Get the next token in the enumeration
                    // State is maintained in the enumerator
                    while (cpEnum->Next(1, &pToken, NULL) == S_OK)
                    {
                        // Get a string which describes the token, in our case, the voice name
                        hr = SpGetDescription( pToken, &ppcDesciptionString[ulIndex] );
                        _ASSERTE( SUCCEEDED( hr ) );
                        
                        // Get the token id, for a low overhead way to retrieve the token later
                        // without holding on to the object itself
                        hr = pToken->GetId( &ppszTokenIds[ulIndex] );
                        _ASSERTE( SUCCEEDED( hr ) );
                        
                        ulIndex++;
                        
                        // Release the token itself
                        pToken->Release();
                        pToken = NULL;
                    }                   
                }
                
                // if we've failed to properly initialize, then we should completely shut-down
                if ( S_OK != hr )
                {
                    if ( pToken )
                    {
                        pToken->Release();
                    }
                    ManageEmployeesPaneCleanup( ppszTokenIds, ppcDesciptionString, ulNumTokens );

                    ppszTokenIds = NULL;
                    ppcDesciptionString = NULL;
                    ulNumTokens = 0;
                }
                // Find out which token corresponds to our voice which is currently in use
                else
                {
                    WCHAR *pszCurTokenId = NULL;

                    // Get the token representing the current voice
                    HRESULT hr = g_cpVoice->GetVoice( &pToken );
                    if ( SUCCEEDED( hr ) )
                    {
                        // Get the current token ID, and compare it against others to figure out
                        // which desciption string is the one currently selected.
                        hr = pToken->GetId( &pszCurTokenId );
                        if ( SUCCEEDED( hr ) )
                        {
                            ulIndex = 0;
                            while ( ulIndex < ulNumTokens && 
                                    0 != _wcsicmp( pszCurTokenId, ppszTokenIds[ulIndex] ) )
                            {
                                ulIndex++;
                            }

                            // We found it, so set the current index to that of the current token
                            if ( ulIndex < ulNumTokens )
                            {
                                ulCurToken = ulIndex;
                            }

                            CoTaskMemFree( pszCurTokenId );
                        }

                        pToken->Release();

                    }                                       

                }
            
            }

            // Set the hear voice rule to active
            hr = g_cpCmdGrammar->SetRuleIdState( VID_HearTheVoice, SPRS_ACTIVE );            
            _ASSERTE( SUCCEEDED( hr ) );

            return ( 1 );
        }

        case WM_DESTROY:
            // Windows is closing down, so we should cleanup
            ManageEmployeesPaneCleanup( ppszTokenIds, ppcDesciptionString, ulNumTokens );
            return ( 1 );

        case WM_HEARTHEVOICE:
            // Set the voice to play
            LoadString( g_hInst, IDS_VOICESPEAK, g_szCounterDisplay, MAX_LOADSTRING );
            g_cpVoice->Speak( T2W(g_szCounterDisplay), SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL );
    }
    return ( 0 );
}
static void configure()
{
	configure_changed = false;
	GtkWidget *window = gtk_dialog_new_with_buttons(_("SAPI TTS configuration"), GTK_WINDOW(plugin_info->pluginwin), GTK_DIALOG_MODAL, GTK_STOCK_OK, GTK_RESPONSE_ACCEPT, NULL);
	GtkWidget *vbox = gtk_vbox_new(false, 5);
	gtk_container_set_border_width(GTK_CONTAINER(vbox),5);
	gtk_container_add (GTK_CONTAINER (GTK_DIALOG(window)->vbox), vbox);
	GtkWidget *frame = gtk_frame_new(_("TTS voice engine"));
	gtk_box_pack_start(GTK_BOX(vbox), frame, false, false, 3);
	GtkWidget *table = gtk_table_new(2, 3, false);
	gtk_container_set_border_width(GTK_CONTAINER(table),8);
	gtk_container_add (GTK_CONTAINER (frame), table);
	GtkWidget *label = gtk_label_new(_("Voice :"));
	gtk_misc_set_alignment (GTK_MISC (label), 0, .5);
	gtk_table_attach(GTK_TABLE(table), label, 0, 1, 0, 1, GTK_FILL, (GtkAttachOptions)0, 5, 0);
	GtkWidget *combobox = gtk_combo_box_new_text();
	gtk_table_attach(GTK_TABLE(table), combobox, 1, 2, 0, 1, GtkAttachOptions(GTK_FILL | GTK_EXPAND), (GtkAttachOptions)0, 5, 0);

	ULONG ulNumTokens;
	WCHAR** ppszTokenIds = NULL;
	CComPtr<IEnumSpObjectTokens> cpEnum;
	HRESULT hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
	if ( S_OK == hr ) {
		hr = cpEnum->GetCount( &ulNumTokens );
		if ( SUCCEEDED( hr ) && 0 != ulNumTokens ) {
			WCHAR *pszCurTokenId = NULL;
			ISpObjectToken *pToken;
			HRESULT hr = pVoice->GetVoice( &pToken );
			if ( SUCCEEDED( hr ) ) {
				pToken->GetId( &pszCurTokenId );
				pToken->Release();
			}
			if (pszCurTokenId) {
				ppszTokenIds = new WCHAR* [ulNumTokens];
				ZeroMemory( ppszTokenIds, ulNumTokens*sizeof( WCHAR* ) );
				LONG ulCurToken = -1;
				ULONG ulIndex = 0;
				while (cpEnum->Next(1, &pToken, NULL) == S_OK) {
					WCHAR *description;
					SpGetDescription( pToken, &description);
					DWORD dwNum = WideCharToMultiByte(CP_UTF8,NULL,description,-1,NULL,0,NULL,FALSE);
					char *text = new char[dwNum];
					WideCharToMultiByte (CP_UTF8,NULL,description,-1,text,dwNum,NULL,FALSE);
					gtk_combo_box_append_text(GTK_COMBO_BOX(combobox), text);
					delete []text;
					CoTaskMemFree(description);
					pToken->GetId( &ppszTokenIds[ulIndex]);
					if (ulCurToken == -1 && _wcsicmp( pszCurTokenId, ppszTokenIds[ulIndex]) == 0) {
						ulCurToken = ulIndex;
					}
					ulIndex++;
					pToken->Release();
				}
				CoTaskMemFree( pszCurTokenId );
				if (ulCurToken != -1)
					gtk_combo_box_set_active(GTK_COMBO_BOX(combobox), ulCurToken);
			}
		}
	}
	g_signal_connect (G_OBJECT (combobox), "changed", G_CALLBACK (on_voice_combobox_changed), ppszTokenIds);

	label = gtk_label_new(_("Volume :"));
	gtk_misc_set_alignment (GTK_MISC (label), 0, .5);
	gtk_table_attach(GTK_TABLE(table), label, 0, 1, 1, 2, GTK_FILL, (GtkAttachOptions)0, 5, 0);
	GtkWidget *volume_hscale = gtk_hscale_new_with_range(0, 100, 1);
	gtk_table_attach(GTK_TABLE(table), volume_hscale, 1, 2, 1, 2, GtkAttachOptions(GTK_FILL | GTK_EXPAND), (GtkAttachOptions)0, 5, 0);
	USHORT volume;
	pVoice->GetVolume(&volume);
	gtk_range_set_value(GTK_RANGE(volume_hscale), volume);
	g_signal_connect(G_OBJECT(volume_hscale),"value-changed", G_CALLBACK(on_volume_hscale_value_changed), NULL);
	label = gtk_label_new(_("Rate :"));
	gtk_misc_set_alignment (GTK_MISC (label), 0, .5);
	gtk_table_attach(GTK_TABLE(table), label, 0, 1, 2, 3, GTK_FILL, (GtkAttachOptions)0, 5, 0);
	GtkWidget *rate_hscale = gtk_hscale_new_with_range(-10, 10, 1);
	gtk_table_attach(GTK_TABLE(table), rate_hscale, 1, 2, 2, 3, GtkAttachOptions(GTK_FILL | GTK_EXPAND), (GtkAttachOptions)0, 5, 0);
	long rate;
	pVoice->GetRate(&rate);
	gtk_range_set_value(GTK_RANGE(rate_hscale), rate);
	g_signal_connect(G_OBJECT(rate_hscale),"value-changed", G_CALLBACK(on_rate_hscale_value_changed), NULL);

	GtkWidget *vbox1 = gtk_vbox_new(false, 5);
	gtk_box_pack_start(GTK_BOX(vbox), vbox1, false, false, 10);
	label = gtk_label_new(_("Input the test text:"));
	gtk_misc_set_alignment (GTK_MISC (label), 0, .5);
	gtk_box_pack_start(GTK_BOX(vbox1), label, false, false, 0);
	GtkWidget *hbox = gtk_hbox_new(false, 5);
	gtk_box_pack_start(GTK_BOX(vbox1), hbox, false, false, 0);
	GtkWidget *entry = gtk_entry_new();
	gtk_entry_set_text(GTK_ENTRY(entry), "This is the test text");
	gtk_box_pack_start(GTK_BOX(hbox), entry, true, true, 0);
	GtkWidget *button = gtk_button_new_with_label(_("Test"));
	gtk_box_pack_start(GTK_BOX(hbox), button, false, false, 0);
	g_signal_connect(G_OBJECT(button),"clicked", G_CALLBACK(on_test_tts_button_clicked), GTK_ENTRY(entry));

	gtk_widget_show_all(vbox);
	gtk_dialog_run(GTK_DIALOG(window));
	if (configure_changed) {
		char *voice = NULL;
		ISpObjectToken *pToken;
		HRESULT hr = pVoice->GetVoice( &pToken );
		if ( SUCCEEDED( hr ) ) {
			WCHAR *id;
			pToken->GetId( &id );
			DWORD dwNum = WideCharToMultiByte(CP_UTF8,NULL,id,-1,NULL,0,NULL,FALSE);
			voice = new char[dwNum];
			WideCharToMultiByte (CP_UTF8,NULL,id,-1,voice,dwNum,NULL,FALSE);
			CoTaskMemFree(id);
			pToken->Release();
		}
		if (voice) {
			gint volume = (gint)gtk_range_get_value(GTK_RANGE(volume_hscale));
			gint rate = (gint)gtk_range_get_value(GTK_RANGE(rate_hscale));
			gchar *data = g_strdup_printf("[sapi_tts]\nvoice=%s\nvolume=%d\nrate=%d\n", voice, volume, rate);
			std::string res = get_cfg_filename();
			g_file_set_contents(res.c_str(), data, -1, NULL);
			g_free(data);
			delete []voice;
		}
	}
	ULONG ulIndex;
	if ( ppszTokenIds ) {
		for ( ulIndex = 0; ulIndex < ulNumTokens; ulIndex++ ) {
			if ( NULL != ppszTokenIds[ulIndex] ) {
				CoTaskMemFree( ppszTokenIds[ulIndex] );
			}
		}
		delete [] ppszTokenIds;
	}
	gtk_widget_destroy (window);
}
Example #17
0
	void Sound::test() {

		ISpVoice * pVoice = NULL;
		ISpObjectToken*        pVoiceToken=nullptr;
		IEnumSpObjectTokens*   pEnum;
		ULONG                  ulCount = 0;

		if (FAILED(::CoInitialize(NULL)))
		{
			return;
		}
		HRESULT hr = S_OK;

		// Find the best matching installed en-us recognizer.
		CComPtr<ISpObjectToken> cpRecognizerToken;

		if (SUCCEEDED(hr))
		{
			hr = SpFindBestToken(SPCAT_RECOGNIZERS, L"language=409", NULL, &cpRecognizerToken);
		}

		// Create the in-process recognizer and immediately set its state to inactive.
		CComPtr<ISpRecognizer> cpRecognizer;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecognizer(cpRecognizerToken);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_INACTIVE);
		}

		// Create a new recognition context from the recognizer.
		CComPtr<ISpRecoContext> cpContext;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->CreateRecoContext(&cpContext);
		}

		// Subscribe to the speech recognition event and end stream event.
		if (SUCCEEDED(hr))
		{
			ULONGLONG ullEventInterest = SPFEI(SPEI_RECOGNITION);
			hr = cpContext->SetInterest(ullEventInterest, ullEventInterest);
		}

		// Establish a Win32 event to signal when speech events are available.
		HANDLE hSpeechNotifyEvent = INVALID_HANDLE_VALUE;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetNotifyWin32Event();
		}

		if (SUCCEEDED(hr))
		{
			hSpeechNotifyEvent = cpContext->GetNotifyEventHandle();

			if (INVALID_HANDLE_VALUE == hSpeechNotifyEvent)
			{
				// Notification handle unsupported.
				hr = E_NOINTERFACE;
			}
		}

		// Initialize an audio object to use the default audio input of the system and set the recognizer to use it.
		CComPtr<ISpAudio> cpAudioIn;

		if (SUCCEEDED(hr))
		{
			hr = cpAudioIn.CoCreateInstance(CLSID_SpMMAudioIn);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetInput(cpAudioIn, TRUE);
		}

		// Populate a WAVEFORMATEX struct with our desired output audio format. information.
		WAVEFORMATEX* pWfexCoMemRetainedAudioFormat = NULL;
		GUID guidRetainedAudioFormat = GUID_NULL;

		if (SUCCEEDED(hr))
		{
			hr = SpConvertStreamFormatEnum(SPSF_16kHz16BitMono, &guidRetainedAudioFormat, &pWfexCoMemRetainedAudioFormat);
		}

		// Instruct the recognizer to retain the audio from its recognition results.
		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetAudioOptions(SPAO_RETAIN_AUDIO, &guidRetainedAudioFormat, pWfexCoMemRetainedAudioFormat);
		}

		if (NULL != pWfexCoMemRetainedAudioFormat)
		{
			CoTaskMemFree(pWfexCoMemRetainedAudioFormat);
		}

		// Create a new grammar and load an SRGS grammar from file.
		CComPtr<ISpRecoGrammar> cpGrammar;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->CreateGrammar(0, &cpGrammar);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->LoadCmdFromFile(L"grammar.grxml", SPLO_STATIC);
		}

		// Set all top-level rules in the new grammar to the active state.
		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE);
		}

		// Set the recognizer state to active to begin recognition.
		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_ACTIVE_ALWAYS);
		}

		// Establish a separate Win32 event to signal the event loop exit.
		HANDLE hExitEvent = CreateEventW(NULL, FALSE, FALSE, NULL);

		// Collect the events listened for to pump the speech event loop.
		HANDLE rghEvents[] = { hSpeechNotifyEvent, hExitEvent };

		// Speech recognition event loop.
		BOOL fContinue = TRUE;

		while (fContinue && SUCCEEDED(hr))
		{
			// Wait for either a speech event or an exit event, with a 15 second timeout.
			DWORD dwMessage = WaitForMultipleObjects(sp_countof(rghEvents), rghEvents, FALSE, 15000);

			switch (dwMessage)
			{
				// With the WaitForMultipleObjects call above, WAIT_OBJECT_0 is a speech event from hSpeechNotifyEvent.
			case WAIT_OBJECT_0:
			{
				// Sequentially grab the available speech events from the speech event queue.
				CSpEvent spevent;

				while (S_OK == spevent.GetFrom(cpContext))
				{
					switch (spevent.eEventId)
					{
					case SPEI_RECOGNITION:
					{
						// Retrieve the recognition result and output the text of that result.
						ISpRecoResult* pResult = spevent.RecoResult();

						LPWSTR pszCoMemResultText = NULL;
						hr = pResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &pszCoMemResultText, NULL);

						if (SUCCEEDED(hr))
						{
							wprintf(L"Recognition event received, text=\"%s\"\r\n", pszCoMemResultText);
						}

						// Also retrieve the retained audio we requested.
						CComPtr<ISpStreamFormat> cpRetainedAudio;

						if (SUCCEEDED(hr))
						{
							hr = pResult->GetAudio(0, 0, &cpRetainedAudio);
						}

						// To demonstrate, we'll speak the retained audio back using ISpVoice.
						CComPtr<ISpVoice> cpVoice;

						if (SUCCEEDED(hr))
						{
							hr = cpVoice.CoCreateInstance(CLSID_SpVoice);
						}

						if (SUCCEEDED(hr))
						{
							hr = cpVoice->SpeakStream(cpRetainedAudio, SPF_DEFAULT, 0);
						}

						if (NULL != pszCoMemResultText)
						{
							CoTaskMemFree(pszCoMemResultText);
						}

						break;
					}
					}
				}

				break;
			}
			case WAIT_OBJECT_0 + 1:
			case WAIT_TIMEOUT:
			{
				// Exit event or timeout; discontinue the speech loop.
				fContinue = FALSE;
				//break;
			}
			}
		}

	CoUninitialize();

		CComPtr <ISpVoice>		cpVoice;
		CComPtr <ISpStream>		cpStream;
		CSpStreamFormat			cAudioFmt;

		//Create a SAPI Voice
		hr = cpVoice.CoCreateInstance(CLSID_SpVoice);

		//Set the audio format
		if (SUCCEEDED(hr))
		{
			hr = cAudioFmt.AssignFormat(SPSF_22kHz16BitMono);
		}

		//Call SPBindToFile, a SAPI helper method,  to bind the audio stream to the file
		if (SUCCEEDED(hr))
		{

			hr = SPBindToFile(L"c:\\ttstemp.wav", SPFM_CREATE_ALWAYS,
				&cpStream, &cAudioFmt.FormatId(), cAudioFmt.WaveFormatExPtr());
		}

		//set the output to cpStream so that the output audio data will be stored in cpStream
		if (SUCCEEDED(hr))
		{
			hr = cpVoice->SetOutput(cpStream, TRUE);
		}

		//Speak the text "hello world" synchronously
		if (SUCCEEDED(hr))
		{
			hr = cpVoice->Speak(L"Hello World", SPF_DEFAULT, NULL);
		}

		//close the stream
		if (SUCCEEDED(hr))
		{
			hr = cpStream->Close();
		}

		//Release the stream and voice object
		cpStream.Release();
		cpVoice.Release();

		CComPtr<ISpGrammarBuilder>    cpGrammarBuilder;
		SPSTATEHANDLE                 hStateTravel;
		// Create (if rule does not already exist)
		// top-level Rule, defaulting to Active.
		hr = cpGrammarBuilder->GetRule(L"Travel", 0, SPRAF_TopLevel | SPRAF_Active, TRUE, &hStateTravel);

		// Approach 1: List all possible phrases.
		// This is the most intuitive approach, and it does not sacrifice efficiency
		// because the grammar builder will merge shared sub-phrases when possible.
		// There is only one root state, hStateTravel, and the terminal NULL state,
		// and there are six unique transitions between root state and NULL state.

		/* XML Approximation:
		<rule id="Travel">
		<item> fly to Seattle </item>
		<item> fly to New York </item>
		<item> fly to Washington DC </item>
		<item> drive to Seattle </item>
		<item> drive to New York </item>
		<item> drive to Washington DC </item>
		</rule>
		*/

		// Create set of peer phrases, each containing complete phrase.
		// Note: the word delimiter is set as " ", so that the text we
		// attach to the transition can be multiple words (for example,
		// "fly to Seattle" is implicitly "fly" + "to" + "Seattle"):
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to Seattle", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to New York", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"fly to Washington DC", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to Seattle", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to New York", L" ", SPWT_LEXICAL, 1, NULL);
		}
		if (SUCCEEDED(hr))
		{
			hr = cpGrammarBuilder->AddWordTransition(hStateTravel, NULL, L"drive to Washington DC", L" ", SPWT_LEXICAL, 1, NULL);
		}
		// Find the best matching installed en-US recognizer.
		//CComPtr<ISpObjectToken> cpRecognizerToken;

		if (SUCCEEDED(hr))
		{
			hr = SpFindBestToken(SPCAT_RECOGNIZERS, L"language=409", NULL, &cpRecognizerToken);
		}

		// Create the in-process recognizer and immediately set its state to inactive.
		//CComPtr<ISpRecognizer> cpRecognizer;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecognizer(cpRecognizerToken);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_INACTIVE);
		}

		// Create a new recognition context from the recognizer.
		//CComPtr<ISpRecoContext> cpContext;

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->CreateRecoContext(&cpContext);
		}

		// Subscribe to the speech recognition event and end stream event.
		if (SUCCEEDED(hr))
		{
			ULONGLONG ullEventInterest = SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM);
			hr = cpContext->SetInterest(ullEventInterest, ullEventInterest);
		}

		// Establish a Win32 event to signal when speech events are available.
		//HANDLE hSpeechNotifyEvent = INVALID_HANDLE_VALUE;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetNotifyWin32Event();
		}

		if (SUCCEEDED(hr))
		{
			hr = cpContext->SetNotifyWin32Event();
		}

		if (SUCCEEDED(hr))
		{
			hSpeechNotifyEvent = cpContext->GetNotifyEventHandle();

			if (INVALID_HANDLE_VALUE == hSpeechNotifyEvent)
			{
				// Notification handle unsupported
				//hr = SPERR_UNITIALIZED;
			}
		}
		// Set up an audio input stream using a .wav file and set the recognizer's input.
		CComPtr<ISpStream> cpInputStream;

		if (SUCCEEDED(hr))
		{
			hr = SPBindToFile(L"Test.wav", SPFM_OPEN_READONLY, &cpInputStream);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetInput(cpInputStream, TRUE);
		}

		// Create a new grammar and load an SRGS grammar from file.
		//CComPtr<ISpRecoGrammar> cpGrammar;

		if (SUCCEEDED(hr))
		{
			hr = cpContext->CreateGrammar(0, &cpGrammar);
		}

		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->LoadCmdFromFile(L"grammar.grxml", SPLO_STATIC);
		}

		// Set all top-level rules in the new grammar to the active state.
		if (SUCCEEDED(hr))
		{
			hr = cpGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE);
		}

		// Finally, set the recognizer state to active to begin recognition.
		if (SUCCEEDED(hr))
		{
			hr = cpRecognizer->SetRecoState(SPRST_ACTIVE_ALWAYS);
		}

		 hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void     **)&pVoice);
		if (SUCCEEDED(hr)) {
			hr = SpEnumTokens(SPCAT_VOICES, L"Gender=Female", NULL, &pEnum);
			if (SUCCEEDED(hr))
			{
				// Get the number of voices.
				hr = pEnum->GetCount(&ulCount);
			}

			// Obtain a list of available voice tokens, set
			// the voice to the token, and call Speak.
			while (SUCCEEDED(hr) && ulCount--)			{
				if (pVoiceToken != nullptr) {
					pVoiceToken->Release();
				}

				if (SUCCEEDED(hr))
				{
					hr = pEnum->Next(1, &pVoiceToken, NULL);
				}

				if (SUCCEEDED(hr))
				{
					hr = pVoice->SetVoice(pVoiceToken);
				}

				if (SUCCEEDED(hr))
				{
					wchar_t* start = L"<?xml version=\"1.0\" encoding=\"ISO - 8859 - 1\"?><speak version = \"1.0\" xmlns = \"http://www.w3.org/2001/10/synthesis\"	xml:lang = \"en-US\">";
					wchar_t* end = L"</speak>";
					const wchar_t *xml = L"<voice required = \"Gender=Male\"> hi! <prosody pitch=\"fast\"> This is low pitch. </prosody><prosody volume=\"x - loud\"> This is extra loud volume. </prosody>";
					wstring s = start;
					s += xml;
					s += end;
					
					hr = pVoice->Speak(xml, SPF_IS_XML| SPF_ASYNC, 0);
					//hr = pVoice->Speak(L"How are you?", SPF_DEFAULT, NULL);
				}

			}
			/*
			if (SUCCEEDED(hr)) {
				hr = pEnum->Next(1, &pVoiceToken, NULL);
				if (SUCCEEDED(hr)) {
					hr = pVoice->SetVoice(pVoiceToken);
					// Set the output to the default audio device.
					if (SUCCEEDED(hr)) {
						hr = pVoice->SetOutput(NULL, TRUE);
						if (SUCCEEDED(hr)) {
							hr = pVoice->Speak(L"Hello, world!", SPF_DEFAULT, 0);
						}
					}
				}
			}
			*/
			pVoice->Release();
		}
		::CoUninitialize();
	}
static int  Create (vlc_object_t *p_this)
{
    filter_t *p_filter = (filter_t *)p_this;
    filter_sys_t *p_sys;
    HRESULT hr;

    p_filter->p_sys = p_sys = (filter_sys_t*) malloc(sizeof(filter_sys_t));
    if (!p_sys)
        return VLC_ENOMEM;

    if (TryEnterMTA(p_this))
        goto error;

    p_sys->cpVoice = nullptr;
    hr = CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_INPROC_SERVER, IID_ISpVoice, (void**) &p_sys->cpVoice);
    if (SUCCEEDED(hr)) {
        ISpObjectToken*        cpVoiceToken = nullptr;
        IEnumSpObjectTokens*   cpEnum = nullptr;
        ULONG ulCount = 0;

        hr = SpEnumTokens(SPCAT_VOICES, nullptr, nullptr, &cpEnum);
        if (SUCCEEDED(hr)) {
            // Get the number of voices.
            hr = cpEnum->GetCount(&ulCount);
            if (SUCCEEDED (hr))
            {
                int voiceIndex = var_InheritInteger(p_this, "sapi-voice");
                if (voiceIndex > - 1)
                {
                    if (voiceIndex < ulCount) {
                        hr = cpEnum->Item(voiceIndex, &cpVoiceToken);
                        if (SUCCEEDED(hr)) {
                            hr = p_sys->cpVoice->SetVoice(cpVoiceToken);
                            if (SUCCEEDED(hr)) {
                                msg_Dbg(p_this, "Selected voice %d", voiceIndex);
                            }
                            else {
                                msg_Err(p_this, "Failed to set voice %d", voiceIndex);
                            }
                            cpVoiceToken->Release();
                            cpVoiceToken = nullptr;
                        }
                    }
                    else
                        msg_Err(p_this, "Voice index exceeds available count");
                }
            }
            cpEnum->Release();
            cpEnum = nullptr;
        }

        if (SUCCEEDED(hr)) {
            hr = p_sys->cpVoice->SetOutput(nullptr, TRUE);
        }
    }
    else
    {
        msg_Err(p_filter, "Could not create SpVoice");
    }

    LeaveMTA();

    p_filter->pf_render_text = RenderText;

    return VLC_SUCCESS;

error:
    free(p_sys);
    return VLC_EGENERIC;
}