/** * Add the start \& end boundaries \a is \& \a ie to bounds. */ void text_wrapper::AddTwinBoundaries(text_boundary const &is, text_boundary const &ie) { unsigned const ns = AddBoundary(is); unsigned const ne = AddBoundary(ie); bounds[ns].start = true; bounds[ns].other = ne; bounds[ne].start = false; bounds[ne].other = ns; }
void ExcerptGen_c::AddJunk ( int iStart, int iLength, int iBoundary ) { int iChunkStart = iStart; int iSaved = 0; for ( int i = iStart; i < iStart+iLength; i++ ){ const char* buf_ptr = NULL; if(m_bUtf8){ buf_ptr = m_sBufferUTF8.cstr (); }else{ buf_ptr = m_sBuffer.cstr (); } if ( sphIsSpace ( buf_ptr[i] ) != sphIsSpace ( buf_ptr[iChunkStart] ) ) { m_dTokens.Resize ( m_dTokens.GetLength () + 1 ); Token_t & tLast = m_dTokens.Last (); tLast.m_eType = TOK_SPACE; tLast.m_iStart = iChunkStart; tLast.m_iLengthBytes = i - iChunkStart; tLast.m_iWordID = 0; tLast.m_uWords = 0; iChunkStart = i; iSaved += tLast.m_iLengthBytes; if ( iBoundary != -1 && iSaved > iBoundary - iStart ) { AddBoundary(); iBoundary = -1; } } } m_dTokens.Resize ( m_dTokens.GetLength () + 1 ); Token_t & tLast = m_dTokens.Last (); tLast.m_eType = TOK_SPACE; tLast.m_iStart = iChunkStart; tLast.m_iLengthBytes = iStart + iLength - iChunkStart; tLast.m_iWordID = 0; tLast.m_uWords = 0; if ( iBoundary != -1 ) AddBoundary(); }
void ExcerptGen_c::AddJunk ( int iStart, int iLength, int iBoundary ) { assert ( iLength>0 ); assert ( iLength<=m_sBuffer.Length() ); assert ( iStart+iLength<=m_sBuffer.Length() ); int iChunkStart = iStart; int iSaved = 0; for ( int i = iStart; i < iStart+iLength; i++ ) if ( sphIsSpace ( m_sBuffer.cstr () [i] )!=sphIsSpace ( m_sBuffer.cstr () [iChunkStart] ) ) { Token_t & tLast = m_dTokens.Add(); tLast.m_eType = TOK_SPACE; tLast.m_iStart = iChunkStart; tLast.m_iLengthBytes = i - iChunkStart; tLast.m_iWordID = 0; tLast.m_uWords = 0; tLast.m_uPosition = 0; iChunkStart = i; iSaved += tLast.m_iLengthBytes; if ( iBoundary!=-1 && iSaved > ( iBoundary-iStart ) ) { AddBoundary(); iBoundary = -1; } } Token_t & tLast = m_dTokens.Add(); tLast.m_eType = TOK_SPACE; tLast.m_iStart = iChunkStart; tLast.m_iLengthBytes = iStart + iLength - iChunkStart; tLast.m_iWordID = 0; tLast.m_uWords = 0; tLast.m_uPosition = 0; if ( iBoundary!=-1 ) AddBoundary(); }