/***************************************************************************** * CTTSEngObj::AddNextSentItem * *-----------------------------* * Locates the next sentence item in the stream and adds it to the list. * Returns true if the last item added is the end of the sentence. ****************************************************************************/ BOOL CTTSEngObj::AddNextSentItem( CItemList& ItemList ) { //--- Get the token ULONG ulIndex; CSentItem Item; Item.pItem = FindNextToken( m_pNextChar, m_pEndChar, m_pNextChar ); //--- This case can occur when we hit the end of a text fragment. // Returning at this point will cause advancement to the next fragment. if( Item.pItem == NULL ) { return false; } const WCHAR* pTrailChar = m_pNextChar-1; ULONG TokenLen = m_pNextChar - Item.pItem; //--- Split off leading punction if any static const WCHAR LeadItems[] = { L'(', L'\"', L'{', L'\'', L'[' }; while( TokenLen > 1 ) { if( SearchSet( Item.pItem[0], LeadItems, sp_countof(LeadItems), &ulIndex ) ) { CSentItem LItem; LItem.pItem = Item.pItem; LItem.ulItemLen = 1; LItem.pXmlState = &m_pCurrFrag->State; LItem.ulItemSrcLen = LItem.ulItemLen; LItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset + ( LItem.pItem - m_pCurrFrag->pTextStart ); ItemList.AddTail( LItem ); ++Item.pItem; --TokenLen; } else { break; } } //--- Get primary item insert position SPLISTPOS ItemPos = ItemList.AddTail( Item ); //--- Split off trailing punction if any. static const WCHAR EOSItems[] = { L'.', L'!', L'?' }; static const WCHAR TrailItems[] = { L',', L'\"', L';', L':', L')', L'}', L'\'', L']' }; SPLISTPOS NextPos = NULL; BOOL fIsEOS = false; while( TokenLen > 1 ) { BOOL fAddTrailItem = false; if( SearchSet( *pTrailChar, EOSItems, sp_countof(EOSItems), &ulIndex ) ) { fIsEOS = true; fAddTrailItem = true; } else if( SearchSet( *pTrailChar, TrailItems, sp_countof(TrailItems), &ulIndex ) ) { fAddTrailItem = true; } if( fAddTrailItem ) { CSentItem TItem; TItem.pItem = pTrailChar; TItem.ulItemLen = 1; TItem.pXmlState = &m_pCurrFrag->State; TItem.ulItemSrcLen = TItem.ulItemLen; TItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset + ( TItem.pItem - m_pCurrFrag->pTextStart ); NextPos = ItemList.InsertAfter( ItemPos, TItem ); --TokenLen; --pTrailChar; } else { break; } } //--- Abreviation or sentence end? // If we are at the end of the buffer then EOS is implied. if( *m_pNextChar == NULL ) { fIsEOS = true; if( !SearchSet( *(m_pNextChar-1), EOSItems, sp_countof(EOSItems), &ulIndex ) ) { //--- Terminate with a period if we are at the end of a buffer // and no end of sentence punction has been added. static const WCHAR* pPeriod = L"."; CSentItem EOSItem; EOSItem.pItem = pPeriod; EOSItem.ulItemLen = 1; EOSItem.pXmlState = &m_pCurrFrag->State; EOSItem.ulItemSrcLen = EOSItem.ulItemLen; EOSItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset + ( (m_pNextChar-1) - m_pCurrFrag->pTextStart ); ItemList.AddTail( EOSItem ); } } else if( pTrailChar[1] == L'.' ) { //--- Here is where you would try to prove that it's not EOS // It might be an abreviation. That's a hard problem that // we are not going to attempt here. } //--- Substitute underscore for apostrophe for( ULONG i = 0; i < TokenLen; ++i ) { if( Item.pItem[i] == L'\'' ) { ((WCHAR)Item.pItem[i]) = L'_'; } } //--- Add the main item if( TokenLen > 0 ) { Item.ulItemLen = TokenLen; Item.pXmlState = &m_pCurrFrag->State; Item.ulItemSrcLen = Item.ulItemLen; Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset + ( Item.pItem - m_pCurrFrag->pTextStart ); ItemList.SetAt( ItemPos, Item ); } return fIsEOS; } /* CTTSEngObj::AddNextSentItem */