bool CISpellAffixRule::CheckSuffix ( const CSphString & sWord ) const { int iCondI = m_iCondLen-1; for ( int i=m_iWordLen-1; iCondI>=0 && i>=0; --i ) { if ( m_sCondition.cstr()[iCondI]!=']' ) { if ( m_sCondition.cstr()[iCondI]!=sWord.cstr()[i] ) return false; --iCondI; } else { int iRangeStart = -1; for ( int j=iCondI; j>=0 && iRangeStart==-1; --j ) if ( m_sCondition.cstr()[j]=='[' ) iRangeStart = j; if ( iRangeStart==-1 ) return false; else { if ( !IsInSet ( sWord.cstr () [i], m_sCondition.SubString ( iRangeStart + 1, iCondI - iRangeStart - 1 ).cstr () ) ) return false; iCondI = iRangeStart - 1; } } } return true; }
bool CISpellAffixRule::CheckPrefix ( const CSphString & sWord ) const { int iCondI = 0; for ( int i = 0; iCondI < m_iCondLen && i < m_iWordLen; ++i ) { if ( m_sCondition.cstr()[iCondI]!='[' ) { if ( m_sCondition.cstr()[iCondI]!=sWord.cstr()[i] ) return false; ++iCondI; } else { int iRangeEnd = -1; for ( int j=iCondI; j<m_iCondLen && iRangeEnd==-1; ++j ) if ( m_sCondition.cstr()[j]==']' ) iRangeEnd = j; if ( iRangeEnd==-1 ) return false; else { if ( !IsInSet ( sWord.cstr () [i], m_sCondition.SubString ( iCondI + 1, iRangeEnd - iCondI - 1 ).cstr () ) ) return false; } } } return true; }
static PluginLib_c * LoadPluginLibrary ( const char * sLibName, CSphString & sError, bool bLinuxReload=false ) { CSphString sTmpfile; CSphString sLibfile; sLibfile.SetSprintf ( "%s/%s", g_sPluginDir.cstr(), sLibName ); // dlopen caches the old file content, even if file was updated // let's reload library from the temporary file to invalidate the cache if ( bLinuxReload ) { sTmpfile.SetSprintf ( "%s/%s.%u", g_sPluginDir.cstr(), sLibName, sphRand() ); if ( ::rename ( sLibfile.cstr(), sTmpfile.cstr() ) ) { sError.SetSprintf ( "failed to rename file (src=%s, dst=%s, errno=%d, error=%s)", sLibfile.cstr(), sTmpfile.cstr(), errno, strerror(errno) ); return NULL; } } void * pHandle = dlopen ( bLinuxReload ? sTmpfile.cstr() : sLibfile.cstr(), RTLD_LAZY | RTLD_LOCAL ); if ( !pHandle ) { const char * sDlerror = dlerror(); sError.SetSprintf ( "dlopen() failed: %s", sDlerror ? sDlerror : "(null)" ); return NULL; } sphLogDebug ( "dlopen(%s)=%p", bLinuxReload ? sTmpfile.cstr() : sLibfile.cstr(), pHandle ); // rename file back to the original name if ( bLinuxReload ) { if ( ::rename ( sTmpfile.cstr(), sLibfile.cstr() ) ) { sError.SetSprintf ( "failed to rename file (src=%s, dst=%s, errno=%d, error=%s)", sTmpfile.cstr(), sLibfile.cstr(), errno, strerror(errno) ); return NULL; } } CSphString sBasename = sLibName; const char * pDot = strchr ( sBasename.cstr(), '.' ); if ( pDot ) sBasename = sBasename.SubString ( 0, pDot-sBasename.cstr() ); CSphString sTmp; PluginVer_fn fnVer = (PluginVer_fn) dlsym ( pHandle, sTmp.SetSprintf ( "%s_ver", sBasename.cstr() ).cstr() ); if ( !fnVer ) { sError.SetSprintf ( "symbol '%s_ver' not found in '%s': update your UDF implementation", sBasename.cstr(), sLibName ); dlclose ( pHandle ); return NULL; } if ( fnVer() < SPH_UDF_VERSION ) { sError.SetSprintf ( "library '%s' was compiled using an older version of sphinxudf.h; it needs to be recompiled", sLibName ); dlclose ( pHandle ); return NULL; } return new PluginLib_c ( pHandle, sLibName ); }
char * ExcerptGen_c::BuildExcerpt ( const ExcerptQuery_t & q, CSphDict * pDict, ISphTokenizer * pTokenizer ) { m_dTokens.Reserve ( 1024 ); m_sBuffer = q.m_sSource; const bool bUtf8 = pTokenizer->IsUtf8(); m_bUtf8 = bUtf8; // tokenize query words int iWordsLength = strlen ( q.m_sWords.cstr() ); CSphVector<char> dKwBuffer ( iWordsLength ); CSphVector<Keyword_t> dKeywords; dKeywords.Reserve ( MAX_HIGHLIGHT_WORDS ); BYTE * sWord; int iKwIndex = 0; pTokenizer->SetBuffer ( (BYTE*)q.m_sWords.cstr(), iWordsLength ); while ( ( sWord = pTokenizer->GetToken() ) != NULL ) { SphWordID_t iWord = pDict->GetWordID ( sWord ); if ( iWord ) { m_dWords.Resize ( m_dWords.GetLength () + 1 ); Token_t & tLast = m_dWords.Last (); tLast.m_eType = TOK_WORD; tLast.m_iWordID = iWord; tLast.m_iLengthBytes = strlen ( (const char *)sWord ); tLast.m_iLengthCP = bUtf8 ? sphUTF8Len ( (const char *)sWord ) : tLast.m_iLengthBytes; // store keyword dKeywords.Resize( dKeywords.GetLength() + 1 ); Keyword_t & kwLast = dKeywords.Last (); // find stars bool bStarBack = *pTokenizer->GetTokenEnd() == '*'; bool bStarFront = ( pTokenizer->GetTokenStart() != pTokenizer->GetBufferPtr() ) && pTokenizer->GetTokenStart()[-1] == '*'; kwLast.m_uStar = ( bStarFront ? STAR_FRONT : 0 ) | ( bStarBack ? STAR_BACK : 0 ); // store token const int iEndIndex = iKwIndex + tLast.m_iLengthBytes + 1; dKwBuffer.Resize ( iEndIndex ); kwLast.m_iWord = iKwIndex; strcpy ( &dKwBuffer [ iKwIndex ], (const char *)sWord ); iKwIndex = iEndIndex; if ( m_dWords.GetLength() == MAX_HIGHLIGHT_WORDS ) break; } } // tokenize document pTokenizer->SetBuffer ( (BYTE*)q.m_sSource.cstr (), strlen ( q.m_sSource.cstr () ) ); const char * pStartPtr = pTokenizer->GetBufferPtr (); const char * pLastTokenEnd = pStartPtr; //assign utf-8 m_sBufferUTF8 = pStartPtr; while ( ( sWord = pTokenizer->GetToken() ) != NULL ) { const char * pTokenStart = pTokenizer->GetTokenStart (); if ( pTokenStart != pStartPtr ) AddJunk ( pLastTokenEnd - pStartPtr, pTokenStart - pLastTokenEnd, pTokenizer->GetBoundary() ? pTokenizer->GetBoundaryOffset() : -1 ); SphWordID_t iWord = pDict->GetWordID ( sWord ); pLastTokenEnd = pTokenizer->GetTokenEnd (); m_dTokens.Resize ( m_dTokens.GetLength () + 1 ); Token_t & tLast = m_dTokens.Last (); tLast.m_eType = iWord ? TOK_WORD : TOK_SPACE; tLast.m_iStart = pTokenStart - pStartPtr; tLast.m_iLengthBytes = pLastTokenEnd - pTokenStart; tLast.m_iWordID = iWord; tLast.m_uWords = 0; // fill word mask if ( iWord ) { bool bMatch = false; int iOffset; ARRAY_FOREACH ( nWord, m_dWords ) { const char * keyword = &dKwBuffer [ dKeywords[nWord].m_iWord ]; const Token_t & token = m_dWords[nWord]; switch ( dKeywords[nWord].m_uStar ) { case STAR_NONE: bMatch = iWord == token.m_iWordID; break; case STAR_FRONT: iOffset = tLast.m_iLengthBytes - token.m_iLengthBytes; bMatch = (iOffset >= 0) && ( memcmp( keyword, sWord + iOffset, token.m_iLengthBytes ) == 0 ); break; case STAR_BACK: bMatch = ( tLast.m_iLengthBytes >= token.m_iLengthBytes ) && ( memcmp( keyword, sWord, token.m_iLengthBytes ) == 0 ); break; case STAR_BOTH: bMatch = strstr( (const char *)sWord, keyword ) != NULL; break; } if ( bMatch ) tLast.m_uWords |= (1UL << nWord); } } } // last space if any if ( pLastTokenEnd != pTokenizer->GetBufferEnd () ) { int iOffset = pTokenizer->GetBoundary() ? pTokenizer->GetBoundaryOffset() : -1; AddJunk ( pLastTokenEnd - pStartPtr, pTokenizer->GetBufferEnd () - pLastTokenEnd, iOffset ); } m_dTokens.Resize ( m_dTokens.GetLength () + 1 ); Token_t & tLast = m_dTokens.Last (); tLast.m_eType = TOK_NONE; tLast.m_iStart = 0; tLast.m_iLengthBytes = 0; tLast.m_iWordID = 0; tLast.m_uWords = 0; // sum token lengths int iSourceCodes = 0; ARRAY_FOREACH ( i, m_dTokens ) { m_dTokens [i].m_iWeight = 0; if ( m_dTokens [i].m_iLengthBytes ) { if ( bUtf8 ) { //int iLen = sphUTF8Len ( m_sBuffer.SubString ( m_dTokens[i].m_iStart, m_dTokens[i].m_iLengthBytes ).cstr() ); int iLen = sphUTF8Len ( m_sBufferUTF8.SubString ( m_dTokens[i].m_iStart, m_dTokens[i].m_iLengthBytes ).cstr() ); m_dTokens[i].m_iLengthCP = iLen; } else m_dTokens[i].m_iLengthCP = m_dTokens[i].m_iLengthBytes; iSourceCodes += m_dTokens[i].m_iLengthCP; } else m_dTokens [i].m_iLengthCP = 0; }