unsigned int CSentence::ComputeDataCheckSum() { #if PHONEME_EDITOR int i; int c; CRC32_t crc; CRC32_Init( &crc ); // Checksum the text CRC32_ProcessBuffer( &crc, GetText(), Q_strlen( GetText() ) ); // Checsum words and phonemes c = m_Words.Count(); for ( i = 0; i < c; ++i ) { CWordTag *word = m_Words[ i ]; unsigned int wordCheckSum = word->ComputeDataCheckSum(); CRC32_ProcessBuffer( &crc, &wordCheckSum, sizeof( unsigned int ) ); } // Checksum emphasis data c = m_EmphasisSamples.Count(); for ( i = 0; i < c; ++i ) { CRC32_ProcessBuffer( &crc, &m_EmphasisSamples[ i ].time, sizeof( float ) ); CRC32_ProcessBuffer( &crc, &m_EmphasisSamples[ i ].value, sizeof( float ) ); } CRC32_Final( &crc ); return ( unsigned int )crc; #else Assert( 0 ); return 0; #endif }
void PrintWordsAndPhonemes( CSentence& sentence, void (*pfnPrint)( const char *fmt, ... ) ) { char sz[ 256 ]; int i; pfnPrint( "WORDS\r\n\r\n" ); for ( i = 0 ; i < sentence.m_Words.Size(); i++ ) { CWordTag *word = sentence.m_Words[ i ]; if ( !word ) continue; sprintf( sz, "<%u - %u> %s\r\n", word->m_uiStartByte, word->m_uiEndByte, word->GetWord() ); pfnPrint( sz ); for ( int j = 0 ; j < word->m_Phonemes.Size(); j++ ) { CPhonemeTag *phoneme = word->m_Phonemes[ j ]; if ( !phoneme ) continue; sprintf( sz, " <%u - %u> %s\r\n", phoneme->m_uiStartByte, phoneme->m_uiEndByte, phoneme->GetTag() ); pfnPrint( sz ); } } pfnPrint( "\r\n" ); }
//----------------------------------------------------------------------------- // Purpose: // Input : from - //----------------------------------------------------------------------------- CWordTag::CWordTag( const CWordTag& from ) { m_pszWord = NULL; SetWord( from.m_pszWord ); SetStartAndEndBytes( from.GetStartByte(), from.GetEndByte() ); m_flStartTime = from.m_flStartTime; m_flEndTime = from.m_flEndTime; SetSelected( from.GetSelected() ); for ( int p = 0; p < from.m_Phonemes.Size(); p++ ) { CPhonemeTag *newPhoneme = new CPhonemeTag( *from.m_Phonemes[ p ] ); m_Phonemes.AddToTail( newPhoneme ); } }
//----------------------------------------------------------------------------- // Purpose: //----------------------------------------------------------------------------- void CSentence::SetTextFromWords( void ) { #if PHONEME_EDITOR char fulltext[ 1024 ]; fulltext[ 0 ] = 0; for ( int i = 0 ; i < m_Words.Size(); i++ ) { CWordTag *word = m_Words[ i ]; Q_strncat( fulltext, word->GetWord(), sizeof( fulltext ), COPY_ALL_CHARACTERS ); if ( i != m_Words.Size() ) { Q_strncat( fulltext, " ", sizeof( fulltext ), COPY_ALL_CHARACTERS ); } } SetText( fulltext ); #endif }
void CSentence::SaveToBuffer( CUtlBuffer& buf ) { #if PHONEME_EDITOR Assert( !m_bIsCached ); int i, j; buf.Printf( "VERSION 1.0\n" ); buf.Printf( "PLAINTEXT\n" ); buf.Printf( "{\n" ); buf.Printf( "%s\n", GetText() ); buf.Printf( "}\n" ); buf.Printf( "WORDS\n" ); buf.Printf( "{\n" ); for ( i = 0; i < m_Words.Size(); i++ ) { CWordTag *word = m_Words[ i ]; Assert( word ); buf.Printf( "WORD %s %.3f %.3f\n", word->GetWord(), word->m_flStartTime, word->m_flEndTime ); buf.Printf( "{\n" ); for ( j = 0; j < word->m_Phonemes.Size(); j++ ) { CPhonemeTag *phoneme = word->m_Phonemes[ j ]; Assert( phoneme ); buf.Printf( "%i %s %.3f %.3f 1\n", phoneme->GetPhonemeCode(), phoneme->GetTag(), phoneme->GetStartTime(), phoneme->GetEndTime() ); } buf.Printf( "}\n" ); } buf.Printf( "}\n" ); buf.Printf( "EMPHASIS\n" ); buf.Printf( "{\n" ); int c = m_EmphasisSamples.Count(); for ( i = 0; i < c; i++ ) { CEmphasisSample *sample = &m_EmphasisSamples[ i ]; Assert( sample ); buf.Printf( "%f %f\n", sample->time, sample->value ); } buf.Printf( "}\n" ); buf.Printf( "OPTIONS\n" ); buf.Printf( "{\n" ); buf.Printf( "voice_duck %d\n", GetVoiceDuck() ? 1 : 0 ); if ( m_bStoreCheckSum ) { buf.Printf( "checksum %d\n", m_uCheckSum ); } buf.Printf( "}\n" ); #else Assert( 0 ); #endif }
void CSentence::CreateEventWordDistribution( char const *pszText, float flSentenceDuration ) { Assert( pszText ); if ( !pszText ) return; int wordCount = CountWords( pszText ); if ( wordCount <= 0 ) return; float wordLength = ( flSentenceDuration - 2 * STARTEND_TIMEGAP) / (float)wordCount; float wordStart = STARTEND_TIMEGAP; Reset(); char word[ 256 ]; unsigned char const *in = (unsigned char *)pszText; char *out = word; while ( *in ) { if ( !ShouldSplitWord( *in ) ) { *out++ = *in++; } else { *out = 0; // Skip over splitters while ( *in && ( ShouldSplitWord( *in ) ) ) { in++; } if ( strlen( word ) > 0 ) { CWordTag *w = new CWordTag(); Assert( w ); w->SetWord( word ); w->m_flStartTime = wordStart; w->m_flEndTime = wordStart + wordLength; AddWordTag( w ); wordStart += wordLength; } out = word; } } *out = 0; if ( strlen( word ) > 0 ) { CWordTag *w = new CWordTag(); Assert( w ); w->SetWord( word ); w->m_flStartTime = wordStart; w->m_flEndTime = wordStart + wordLength; AddWordTag( w ); wordStart += wordLength; } }
//----------------------------------------------------------------------------- // Purpose: Given a wavfile and a list of inwords, determines the word/phonene // sample counts for the sentce // Input : *wavfile - // *inwords - // *outphonemes{ text.Clear( - // Output : SR_RESULT //----------------------------------------------------------------------------- static SR_RESULT SAPI_ExtractPhonemes( const char *wavfile, int numsamples, void (*pfnPrint)( const char *fmt, ... ), CSentence& inwords, CSentence& outwords ) { LogReset(); USES_CONVERSION; CSpDynamicString text; text.Clear(); HKEY hkwipe; LONG lResult = RegOpenKeyEx( HKEY_CURRENT_USER, "Software\\Microsoft\\Speech\\RecoProfiles", 0, KEY_ALL_ACCESS, &hkwipe ); if ( lResult == ERROR_SUCCESS ) { RecursiveRegDelKey( hkwipe ); RegCloseKey( hkwipe ); } if ( strlen( inwords.GetText() ) <= 0 ) { inwords.SetTextFromWords(); } // Construct a string from the inwords array text.Append( T2W( inwords.GetText() ) ); // Assume failure SR_RESULT result = SR_RESULT_ERROR; if ( text.Length() > 0 ) { CSentence sentence; pfnPrint( "Processing...\r\n" ); // Give it a try result = ExtractPhonemes( wavfile, text, sentence, pfnPrint ); pfnPrint( "Finished.\r\n" ); // PrintWordsAndPhonemes( sentence, pfnPrint ); // Copy results to outputs outwords.Reset(); outwords.SetText( inwords.GetText() ); Log( "Starting\n" ); LogWords( inwords ); if ( SR_RESULT_ERROR != result ) { int i; Log( "Hypothesized\n" ); LogWords( sentence ); for( i = 0 ; i < sentence.m_Words.Size(); i++ ) { CWordTag *tag = sentence.m_Words[ i ]; if ( tag ) { // Skip '...' tag if ( stricmp( tag->GetWord(), "..." ) ) { CWordTag *newTag = new CWordTag( *tag ); outwords.m_Words.AddToTail( newTag ); } } } // Now insert unrecognized/skipped words from original list // int frompos = 0, topos = 0; while( 1 ) { // End of source list if ( frompos >= inwords.m_Words.Size() ) break; const CWordTag *fromTag = inwords.m_Words[ frompos ]; // Reached end of destination list, just copy words over from from source list until // we run out of source words if ( topos >= outwords.m_Words.Size() ) { // Just copy words over CWordTag *newWord = new CWordTag( *fromTag ); // Remove phonemes while ( newWord->m_Phonemes.Size() > 0 ) { CPhonemeTag *kill = newWord->m_Phonemes[ 0 ]; newWord->m_Phonemes.Remove( 0 ); delete kill; } outwords.m_Words.AddToTail( newWord ); frompos++; topos++; continue; } // Destination word const CWordTag *toTag = outwords.m_Words[ topos ]; // Words match, just skip ahead if ( !stricmp( fromTag->GetWord(), toTag->GetWord() ) ) { frompos++; topos++; continue; } // The only case we handle is that something in the source wasn't in the destination // Find the next source word that appears in the destination int skipAhead = frompos + 1; bool found = false; while ( skipAhead < inwords.m_Words.Size() ) { const CWordTag *sourceWord = inwords.m_Words[ skipAhead ]; if ( !stricmp( sourceWord->GetWord(), toTag->GetWord() ) ) { found = true; break; } skipAhead++; } // Uh oh destination has words that are not in source, just skip to next destination word? if ( !found ) { topos++; } else { // Copy words from from source list into destination // int skipCount = skipAhead - frompos; while ( --skipCount>= 0 ) { const CWordTag *sourceWord = inwords.m_Words[ frompos++ ]; CWordTag *newWord = new CWordTag( *sourceWord ); // Remove phonemes while ( newWord->m_Phonemes.Size() > 0 ) { CPhonemeTag *kill = newWord->m_Phonemes[ 0 ]; newWord->m_Phonemes.Remove( 0 ); delete kill; } outwords.m_Words.InsertBefore( topos, newWord ); topos++; } frompos++; topos++; } } Log( "\nDone simple check\n" ); LogWords( outwords ); LogPhonemes( outwords ); ComputeMissingByteSpans( numsamples, outwords ); Log( "\nFinal check\n" ); LogWords( outwords ); LogPhonemes( outwords ); } } else { pfnPrint( "Input sentence is empty!\n" ); } // Return results return result; }