unsigned int CWordTag::ComputeDataCheckSum() { int i; int c; CRC32_t crc; CRC32_Init( &crc ); // Checksum the text if ( m_pszWord != NULL ) { CRC32_ProcessBuffer( &crc, m_pszWord, Q_strlen( m_pszWord ) ); } // Checksum phonemes c = m_Phonemes.Count(); for ( i = 0; i < c; ++i ) { CPhonemeTag *phoneme = m_Phonemes[ i ]; unsigned int phonemeCheckSum = phoneme->ComputeDataCheckSum(); CRC32_ProcessBuffer( &crc, &phonemeCheckSum, sizeof( unsigned int ) ); } // Checksum timestamps CRC32_ProcessBuffer( &crc, &m_flStartTime, sizeof( float ) ); CRC32_ProcessBuffer( &crc, &m_flEndTime, sizeof( float ) ); CRC32_Final( &crc ); return ( unsigned int )crc; }
void PrintWordsAndPhonemes( CSentence& sentence, void (*pfnPrint)( const char *fmt, ... ) ) { char sz[ 256 ]; int i; pfnPrint( "WORDS\r\n\r\n" ); for ( i = 0 ; i < sentence.m_Words.Size(); i++ ) { CWordTag *word = sentence.m_Words[ i ]; if ( !word ) continue; sprintf( sz, "<%u - %u> %s\r\n", word->m_uiStartByte, word->m_uiEndByte, word->GetWord() ); pfnPrint( sz ); for ( int j = 0 ; j < word->m_Phonemes.Size(); j++ ) { CPhonemeTag *phoneme = word->m_Phonemes[ j ]; if ( !phoneme ) continue; sprintf( sz, " <%u - %u> %s\r\n", phoneme->m_uiStartByte, phoneme->m_uiEndByte, phoneme->GetTag() ); pfnPrint( sz ); } } pfnPrint( "\r\n" ); }
void CSentence::Append( float starttime, const CSentence& src ) { #if PHONEME_EDITOR int i; // Combine for ( i = 0 ; i < src.m_Words.Size(); i++ ) { CWordTag *word = src.m_Words[ i ]; CWordTag *newWord = new CWordTag( *word ); newWord->m_flStartTime += starttime; newWord->m_flEndTime += starttime; // Offset times int c = newWord->m_Phonemes.Count(); for ( int i = 0; i < c; ++i ) { CPhonemeTag *tag = newWord->m_Phonemes[ i ]; tag->AddStartTime( starttime ); tag->AddEndTime( starttime ); } AddWordTag( newWord ); } if ( src.GetText()[ 0 ] ) { char fulltext[ 4096 ]; if ( GetText()[ 0 ] ) { Q_snprintf( fulltext, sizeof( fulltext ), "%s %s", GetText(), src.GetText() ); } else { Q_strncpy( fulltext, src.GetText(), sizeof( fulltext ) ); } SetText( fulltext ); } int c = src.m_EmphasisSamples.Size(); for ( i = 0; i < c; i++ ) { CEmphasisSample s = src.m_EmphasisSamples[ i ]; s.time += starttime; m_EmphasisSamples.AddToTail( s ); } // Or in voice duck settings m_bShouldVoiceDuck |= src.m_bShouldVoiceDuck; #else Assert( 0 ); #endif }
//----------------------------------------------------------------------------- // Purpose: // Input : from - //----------------------------------------------------------------------------- CPhonemeTag::CPhonemeTag( const CPhonemeTag& from ) : BaseClass( from ) { SetStartAndEndBytes( from.GetStartByte(), from.GetEndByte() ); SetSelected( from.GetSelected() ); m_szPhoneme = NULL; SetTag( from.GetTag() ); }
//----------------------------------------------------------------------------- // Purpose: // Input : buf - //----------------------------------------------------------------------------- void CSentence::CacheRestoreFromBuffer( CUtlBuffer& buf ) { Assert( !buf.IsText() ); Reset(); m_bIsCached = true; int version = buf.GetChar(); if ( version != CACHED_SENTENCE_VERSION ) { // Uh oh, version changed... m_bIsValid = false; return; } unsigned short pcount = (unsigned short)buf.GetShort(); CPhonemeTag pt; int i; for ( i = 0; i < pcount; ++i ) { unsigned short code = buf.GetShort(); float st = buf.GetFloat(); float et = buf.GetFloat(); pt.SetPhonemeCode( code ); pt.SetStartTime( st ); pt.SetEndTime( et ); AddRuntimePhoneme( &pt ); } // Now read emphasis samples int c = buf.GetShort(); for ( i = 0; i < c; i++ ) { CEmphasisSample sample; sample.SetSelected( false ); sample.time = buf.GetFloat(); sample.value = (float)buf.GetShort() / 32767.0f; m_EmphasisSamples.AddToTail( sample ); } // And voice duck SetVoiceDuck( buf.GetChar() == 0 ? false : true ); m_bIsValid = true; }
void CSentence::SaveToBuffer( CUtlBuffer& buf ) { #if PHONEME_EDITOR Assert( !m_bIsCached ); int i, j; buf.Printf( "VERSION 1.0\n" ); buf.Printf( "PLAINTEXT\n" ); buf.Printf( "{\n" ); buf.Printf( "%s\n", GetText() ); buf.Printf( "}\n" ); buf.Printf( "WORDS\n" ); buf.Printf( "{\n" ); for ( i = 0; i < m_Words.Size(); i++ ) { CWordTag *word = m_Words[ i ]; Assert( word ); buf.Printf( "WORD %s %.3f %.3f\n", word->GetWord(), word->m_flStartTime, word->m_flEndTime ); buf.Printf( "{\n" ); for ( j = 0; j < word->m_Phonemes.Size(); j++ ) { CPhonemeTag *phoneme = word->m_Phonemes[ j ]; Assert( phoneme ); buf.Printf( "%i %s %.3f %.3f 1\n", phoneme->GetPhonemeCode(), phoneme->GetTag(), phoneme->GetStartTime(), phoneme->GetEndTime() ); } buf.Printf( "}\n" ); } buf.Printf( "}\n" ); buf.Printf( "EMPHASIS\n" ); buf.Printf( "{\n" ); int c = m_EmphasisSamples.Count(); for ( i = 0; i < c; i++ ) { CEmphasisSample *sample = &m_EmphasisSamples[ i ]; Assert( sample ); buf.Printf( "%f %f\n", sample->time, sample->value ); } buf.Printf( "}\n" ); buf.Printf( "OPTIONS\n" ); buf.Printf( "{\n" ); buf.Printf( "voice_duck %d\n", GetVoiceDuck() ? 1 : 0 ); if ( m_bStoreCheckSum ) { buf.Printf( "checksum %d\n", m_uCheckSum ); } buf.Printf( "}\n" ); #else Assert( 0 ); #endif }
void CSentence::ParseWords( CUtlBuffer& buf ) { char token[ 4096 ]; char word[ 256 ]; float start, end; while ( 1 ) { buf.GetString( token ); if ( !stricmp( token, "}" ) ) break; if ( stricmp( token, "WORD" ) ) break; buf.GetString( token ); Q_strncpy( word, token, sizeof( word ) ); buf.GetString( token ); start = atof( token ); buf.GetString( token ); end = atof( token ); CWordTag *wt = new CWordTag( word ); assert( wt ); wt->m_flStartTime = start; wt->m_flEndTime = end; AddWordTag( wt ); buf.GetString( token ); if ( stricmp( token, "{" ) ) break; while ( 1 ) { buf.GetString( token ); if ( !stricmp( token, "}" ) ) break; // Parse phoneme int code; char phonemename[ 256 ]; float start, end; float volume; code = atoi( token ); buf.GetString( token ); Q_strncpy( phonemename, token, sizeof( phonemename ) ); buf.GetString( token ); start = atof( token ); buf.GetString( token ); end = atof( token ); buf.GetString( token ); volume = atof( token ); CPhonemeTag *pt = new CPhonemeTag(); assert( pt ); pt->SetPhonemeCode( code ); pt->SetTag( phonemename ); pt->SetStartTime( start ); pt->SetEndTime( end ); AddPhonemeTag( wt, pt ); } } }
//----------------------------------------------------------------------------- // Purpose: Walk list of words and phonemes and create phoneme tags in CSentence object // FIXME: Right now, phonemes are assumed to evenly space out across a word. // Input : *converter - // result - // sentence - //----------------------------------------------------------------------------- void EnumeratePhonemes( ISpPhoneConverter *converter, const ISpRecoResult* result, CSentence& sentence ) { USES_CONVERSION; // Grab access to element container ISpPhrase *phrase = ( ISpPhrase * )result; if ( !phrase ) return; SPPHRASE *pElements; if ( !SUCCEEDED( phrase->GetPhrase( &pElements ) ) ) return; // Only use it if it's better/same size as what we already had on-hand if ( pElements->Rule.ulCountOfElements > 0 ) //(unsigned int)( sentence.m_Words.Size() - sentence.GetWordBase() ) ) { sentence.ResetToBase(); // Walk list of words for ( ULONG i = 0; i < pElements->Rule.ulCountOfElements; i++ ) { unsigned int wordstart, wordend; // Get start/end sample index wordstart = pElements->pElements[i].ulAudioStreamOffset + (unsigned int)pElements->ullAudioStreamPosition; wordend = wordstart + pElements->pElements[i].ulAudioSizeBytes; // Create word tag CWordTag *w = new CWordTag( W2T( pElements->pElements[i].pszDisplayText ) ); Assert( w ); w->m_uiStartByte = wordstart; w->m_uiEndByte = wordend; sentence.AddWordTag( w ); // Count # of phonemes in this word SPPHONEID pstr[ 2 ]; pstr[ 1 ] = 0; WCHAR wszPhoneme[ SP_MAX_PRON_LENGTH ]; const SPPHONEID *current; SPPHONEID phoneme; current = pElements->pElements[i].pszPronunciation; float total_weight = 0.0f; while ( 1 ) { phoneme = *current++; if ( !phoneme ) break; pstr[ 0 ] = phoneme; wszPhoneme[ 0 ] = L'\0'; converter->IdToPhone( pstr, wszPhoneme ); total_weight += WeightForPhoneme( W2A( wszPhoneme ) ); } current = pElements->pElements[i].pszPronunciation; // Decide # of bytes/phoneme weight float psize = 0; if ( total_weight ) { psize = ( wordend - wordstart ) / total_weight; } int number = 0; // Re-walk the phoneme list and create true phoneme tags float startWeight = 0.0f; while ( 1 ) { phoneme = *current++; if ( !phoneme ) break; pstr[ 0 ] = phoneme; wszPhoneme[ 0 ] = L'\0'; converter->IdToPhone( pstr, wszPhoneme ); CPhonemeTag *p = new CPhonemeTag( W2A( wszPhoneme ) ); Assert( p ); float weight = WeightForPhoneme( W2A( wszPhoneme ) ); p->m_uiStartByte = wordstart + (int)( startWeight * psize ); p->m_uiEndByte = p->m_uiStartByte + (int)( psize * weight ); startWeight += weight; // Convert to IPA phoneme code p->SetPhonemeCode( TextToPhoneme( p->GetTag() ) ); sentence.AddPhonemeTag( w, p ); number++; } } } // Free memory ::CoTaskMemFree(pElements); }