Example #1
0
bool LexicsCutter::CheckLexics(std::string& Phrase)
{
    std::string lchar;
    LC_WordMap::iterator i;
    std::pair<LC_WordMap::iterator, LC_WordMap::iterator> ii;

    if (Phrase.size() == 0)
        return false;

    // first, convert the string, adding spaces and removing invalid characters
    // also create fast position vector for the new positions
    std::string str = " ";
    unsigned int pos = 0;
    while (ReadUTF8(Phrase, lchar, pos))
        if (m_sInvalidChars.find(lchar) == std::string::npos)
            str.append(lchar);

    // string prepared, now parse it and scan for all the words
    unsigned int pos_prev = 0;
    pos = 0;
    while (ReadUTF8(str, lchar, pos))
    {
        // got character, now try to find wordmap for it
        ii = m_WordMap.equal_range(lchar);
        // iterate over all found words
        for (i = ii.first; i != ii.second; i++)
            // compare word at initial position
            if (CompareWord(str, pos_prev, m_WordList[i->second]))
                return true;
       // set initial position to the current position
       pos_prev = pos;
    }
    return false;
}
Example #2
0
int SuffixArray::Match( const vector< WORD > &phrase, INDEX index )
{
  INDEX pos = m_index[ index ];
  for(INDEX i=0; i<phrase.size() && i+pos<m_size; i++) {
    int match = CompareWord( m_vcb.GetWordID( phrase[i] ), m_array[ pos+i ] );
    // cerr << "{" << index << "+" << i << "," << pos+i << ":" << match << "}" << endl;
    if (match != 0)
      return match;
  }
  return 0;
}
int SuffixArray::CompareIndex( INDEX a, INDEX b ) const
{
	// skip over identical words
	INDEX offset = 0;
	while( a+offset < m_size &&
				 b+offset < m_size &&
				 m_array[ a+offset ] == m_array[ b+offset ] )
	{ offset++; }
	
	if( a+offset == m_size ) return -1;
	if( b+offset == m_size ) return 1;
	return CompareWord( m_array[ a+offset ], m_array[ b+offset ] );
}