Ejemplo n.º 1
0
bool LexicsCutter::Compare_Word(std::string& str, unsigned int pos, LC_WordVector word)
{
   std::string lchar_prev;
    std::string lchar;

   // read first letter of the word into lchar_prev
   ReadUTF8(str, lchar, pos);

    // okay, here we go, comparing word
    // first letter is already okay, we do begin from second and go on
    LC_WordVector::iterator i = word.begin();
    i++;
    while (i != word.end())
    {
        // get letter from word, return false if the string is shorter
        if (!ReadUTF8(str, lchar, pos)) return(false);
        // check, if the letter is in the set
        LC_LetterSet ls = *i;
        if (ls.count(lchar) == 0)
       {
           // letter is not in set, but we must check, if it is not space or repeat
           if ( (!(IgnoreMiddleSpaces && (lchar == " "))) &&
               (!(IgnoreLetterRepeat && (lchar == lchar_prev))) )
           {
               // no checks viable
               return(false);
           }
       }
       else
       {
           // next word letter
           i++;
       }
       // set previous string letter to compare if needed (this check can really conserve time)
       if (IgnoreLetterRepeat) lchar_prev = lchar;
   }

    return(true);
}
Ejemplo n.º 2
0
bool LexicsCutter::_CompareWord(const std::string& str, unsigned int pos, LC_WordVector word) const
{
    std::string chPrev;
    std::string ch;

    // Read first letter of the word into lchar_prev
    ReadUTF8(str, ch, pos);

    // Compare word
    // First letter is already okay, we do begin from second and go on
    LC_WordVector::iterator i = word.begin();
    ++i;
    while (i != word.end())
    {
        // Get letter from word, return false if the string is shorter
        if (!ReadUTF8(str, ch, pos))
            return false;
        // Check, if the letter is in the set
        LC_LetterSet ls = *i;
        if (ls.count(ch) == 0)
        {
            // Letter is not in set, but we must check, if it is not space or repeat
            if ((!(_ignoreMiddleSpaces && (ch == " "))) &&
                (!(_ignoreLetterRepeat && (ch == chPrev))))
                // No checks viable
                return false;
        }
        else
            // Next word letter
            ++i;

        // Set previous string letter to compare if needed (this check can really conserve time)
        if (_ignoreLetterRepeat)
            chPrev = ch;
    }
    return true;
}