Beispiel #1
0
lem::UCString Lemmatizator::GetSuffix( const lem::UCString & word ) const
{
 LEM_CHECKIT_Z( !word.empty() );

 lem::UCString uword(word);
 uword.to_lower();

 if( word.length()==7 && uword==L"~~end~~" )
  return word;
 else if( word.length()==9 && uword==L"~~begin~~" )
  return word;
 else if( word.length()<=model_suffix_len )
  return uword;
 else
  return lem::UCString(L'~')+lem::right( uword, model_suffix_len );
}
Beispiel #2
0
void Lemmatizator::EncodeWord1( const lem::UCString & u, lem::CString & a ) const
{
 for( int i=0; i<u.length(); ++i )
  {
   std::map<wchar_t,int>::const_iterator it = wchar2i.find(u[i]);
   if( it==wchar2i.end() )
    a.ptr()[i] = (char)0xff;
   else
    a.ptr()[i] = it->second;
  }

 a.ptr()[ u.length() ] = 0;
 a.calc_hash();   
 
 return; 
}
Beispiel #3
0
SG_EntryGroup::KEY SG_EntryGroup::BuildKey( const lem::UCString &str )
{
 switch( str.length() )
 {
  case 1: return KEY( str.front(), 0, 0 );
  case 2: return KEY( str.front(), str[1], 0 );
  default: return KEY( str.front(), str[1], str[2] );
 }
}
Beispiel #4
0
const lem::CString lem::to_ascii( const lem::UCString &str, const CodeConverter *cp )
{
 if( str.empty() )
  return CString();

 lem::CString ascii;
 lem_unicode_to_ascii( ascii.ptr(), str.c_str(), cp ? cp : &lem::UI::get_UI().GetSessionCp() );
 ascii.ptr()[str.length()] = 0;
 ascii.calc_hash();
 return ascii;
}