lem::UCString Lemmatizator::GetSuffix( const lem::UCString & word ) const { LEM_CHECKIT_Z( !word.empty() ); lem::UCString uword(word); uword.to_lower(); if( word.length()==7 && uword==L"~~end~~" ) return word; else if( word.length()==9 && uword==L"~~begin~~" ) return word; else if( word.length()<=model_suffix_len ) return uword; else return lem::UCString(L'~')+lem::right( uword, model_suffix_len ); }
void Lemmatizator::EncodeWord1( const lem::UCString & u, lem::CString & a ) const { for( int i=0; i<u.length(); ++i ) { std::map<wchar_t,int>::const_iterator it = wchar2i.find(u[i]); if( it==wchar2i.end() ) a.ptr()[i] = (char)0xff; else a.ptr()[i] = it->second; } a.ptr()[ u.length() ] = 0; a.calc_hash(); return; }
SG_EntryGroup::KEY SG_EntryGroup::BuildKey( const lem::UCString &str ) { switch( str.length() ) { case 1: return KEY( str.front(), 0, 0 ); case 2: return KEY( str.front(), str[1], 0 ); default: return KEY( str.front(), str[1], str[2] ); } }
const lem::CString lem::to_ascii( const lem::UCString &str, const CodeConverter *cp ) { if( str.empty() ) return CString(); lem::CString ascii; lem_unicode_to_ascii( ascii.ptr(), str.c_str(), cp ? cp : &lem::UI::get_UI().GetSessionCp() ); ascii.ptr()[str.length()] = 0; ascii.calc_hash(); return ascii; }