Beispiel #1
0
bool wxHtmlSearchEngine::Scan(const wxFSFile& file)
{
    wxASSERT_MSG(!m_Keyword.empty(), wxT("wxHtmlSearchEngine::LookFor must be called before scanning!"));

    wxHtmlFilterHTML filter;
    wxString bufStr = filter.ReadFile(file);

    if (!m_CaseSensitive)
        bufStr.LowerCase();

    {   // remove html tags
        wxString bufStrCopy;
        bufStrCopy.reserve( bufStr.size() );
        bool insideTag = false;
        for (const wxChar * pBufStr = bufStr.c_str(); *pBufStr; ++pBufStr)
        {
            wxChar c = *pBufStr;
            if (insideTag)
            {
                if (c == wxT('>'))
                {
                    insideTag = false;
                    // replace the tag by an empty space
                    c = wxT(' ');
                }
                else
                    continue;
            }
            else if (c == wxT('<'))
            {
                wxChar nextCh = *(pBufStr + 1);
                if (nextCh == wxT('/') || !WHITESPACE(nextCh))
                {
                    insideTag = true;
                    continue;
                }
            }
            bufStrCopy += c;
        }
        bufStr.swap( bufStrCopy );
    }

    wxString keyword = m_Keyword;

    if (m_WholeWords)
    {
        // insert ' ' at the beginning and at the end
        keyword.insert( 0, wxT(" ") );
        keyword.append( wxT(" ") );
        bufStr.insert( 0, wxT(" ") );
        bufStr.append( wxT(" ") );
    }

    // remove continuous spaces
    keyword = CompressSpaces( keyword );
    bufStr = CompressSpaces( bufStr );

    // finally do the search
    return bufStr.find( keyword ) != wxString::npos;
}
Beispiel #2
0
unsigned int ClearTextFromHTMLTags(struct word_collection * acol,char * text,unsigned int *textsize)
{
// unsigned int start_text_size = textsize;

 unsigned int ptr = 0;
 unsigned int token = 666; // <- GIA NA MPEI STO PRWTO WHILE LOOP!
 unsigned int token2 = 0;
 unsigned int i;

 printf("ClearTextFromHTMLTags \n");
 if ( TextPointerError(text,textsize) ) { return 0; }

 if ( text[0]=='<' )
    {
      printf("Bug with unsigned ints :P , sloppy fix \n");
      text[0]=' ',text[1]='<';
    }

 while ( token > 0  )
 {
   token = FindFirstInstanceOfChar(ptr,text,'<',*textsize);

   if ( token != 0 ) { ptr = token; }


   if (  token > 0  )
   {
    token2 = FindFirstInstanceOfChar(ptr,text,'>',*textsize);

    if ( token2 > 0 )
    {
        for ( i=token; i<=token2; i++ )
        {
             text[i]=' ';
        }
        text[token]='|'; // SIGNAL DELIMITER ( IT MEANS AN HTML TAG WAS REPLACED )
      if ( token2 != 0 ) { ptr = token2; }

    }
   }


 }

 CompressSpaces(text,textsize);
 ExtractWords(acol,text,textsize);
 return 0;
}