bool wxHtmlSearchEngine::Scan(const wxFSFile& file) { wxASSERT_MSG(!m_Keyword.empty(), wxT("wxHtmlSearchEngine::LookFor must be called before scanning!")); wxHtmlFilterHTML filter; wxString bufStr = filter.ReadFile(file); if (!m_CaseSensitive) bufStr.LowerCase(); { // remove html tags wxString bufStrCopy; bufStrCopy.reserve( bufStr.size() ); bool insideTag = false; for (const wxChar * pBufStr = bufStr.c_str(); *pBufStr; ++pBufStr) { wxChar c = *pBufStr; if (insideTag) { if (c == wxT('>')) { insideTag = false; // replace the tag by an empty space c = wxT(' '); } else continue; } else if (c == wxT('<')) { wxChar nextCh = *(pBufStr + 1); if (nextCh == wxT('/') || !WHITESPACE(nextCh)) { insideTag = true; continue; } } bufStrCopy += c; } bufStr.swap( bufStrCopy ); } wxString keyword = m_Keyword; if (m_WholeWords) { // insert ' ' at the beginning and at the end keyword.insert( 0, wxT(" ") ); keyword.append( wxT(" ") ); bufStr.insert( 0, wxT(" ") ); bufStr.append( wxT(" ") ); } // remove continuous spaces keyword = CompressSpaces( keyword ); bufStr = CompressSpaces( bufStr ); // finally do the search return bufStr.find( keyword ) != wxString::npos; }
unsigned int ClearTextFromHTMLTags(struct word_collection * acol,char * text,unsigned int *textsize) { // unsigned int start_text_size = textsize; unsigned int ptr = 0; unsigned int token = 666; // <- GIA NA MPEI STO PRWTO WHILE LOOP! unsigned int token2 = 0; unsigned int i; printf("ClearTextFromHTMLTags \n"); if ( TextPointerError(text,textsize) ) { return 0; } if ( text[0]=='<' ) { printf("Bug with unsigned ints :P , sloppy fix \n"); text[0]=' ',text[1]='<'; } while ( token > 0 ) { token = FindFirstInstanceOfChar(ptr,text,'<',*textsize); if ( token != 0 ) { ptr = token; } if ( token > 0 ) { token2 = FindFirstInstanceOfChar(ptr,text,'>',*textsize); if ( token2 > 0 ) { for ( i=token; i<=token2; i++ ) { text[i]=' '; } text[token]='|'; // SIGNAL DELIMITER ( IT MEANS AN HTML TAG WAS REPLACED ) if ( token2 != 0 ) { ptr = token2; } } } } CompressSpaces(text,textsize); ExtractWords(acol,text,textsize); return 0; }