bool CWord::RightPartIsSurname(int& iH, THomonymGrammems& grammems, Wtroka& strLemma) { iH = HasMorphNounWithGrammems_i(TGramBitSet(gSurname)); if (iH != -1) { CHomonym& h = GetRusHomonym(iH); grammems = h.Grammems; strLemma = h.GetLemma(); size_t ii = strLemma.find('-'); YASSERT(ii != Wtroka::npos); strLemma = strLemma.substr(ii + 1); return true; } //if this word is in morphology - do not try to predict if (IsDictionary()) return false; size_t ii = m_txt.find('-'); if (ii == Wtroka::npos) return false; Wtroka strRightPart = m_txt.substr(ii + 1); TMorph::ToLower(strRightPart); yvector<TSurnamePredictor::TPredictedSurname> out; if (!TMorph::PredictSurname(strRightPart, out)) return false; TGrammarBunch newForms; NSpike::ToGrammarBunch(out[0].StemGrammar, out[0].FlexGrammars, newForms); grammems.Reset(newForms); strLemma = out[0].Lemma; return true; }
static void MergeGrammems(THomonymGrammems& dst, const TGramBitSet& art_grammems, const TGramBitSet& newPos) { // first, reset Part Of Speech if any if (newPos.HasAny(TMorph::AllPOS())) dst.SetPOS(newPos); else if (art_grammems.HasAny(TMorph::AllPOS())) dst.SetPOS(art_grammems); // take other grammems from @art_grammems if any TGramBitSet other = art_grammems & ~TMorph::AllPOS(); if (other.any()) { // if there is a form with such grammems - just leave it alone and drop the rest ones bool found = false; for (THomonymGrammems::TFormIter it = dst.IterForms(); it.Ok(); ++it) if (it->HasAll(other)) { dst.ResetSingleForm(*it); found = true; } if (!found) { // otherwise merge all forms and replace grammems by classes TGramBitSet newForm = dst.All(); newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllCases); newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllGenders); newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllNumbers); const TGramBitSet anim(gAnimated, gInanimated); newForm.ReplaceByMaskIfAny(art_grammems, anim); newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllTimes); newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllPersons); // just add the rest non-classified grammems static const TGramBitSet nonclassified = ~(NSpike::AllCases | NSpike::AllGenders | NSpike::AllNumbers | anim | NSpike::AllTimes | NSpike::AllPersons); newForm |= art_grammems & nonclassified; dst.Reset(newForm); } } // if we still do not known POS, apply some workarounds: if (dst.GetPOS().none()) { dst.SetPOS(TGramBitSet(gSubstantive)); if (!dst.HasAny(NSpike::AllCases)) dst.Add(NSpike::AllCases); if (!dst.HasAny(NSpike::AllGenders)) dst.Add(NSpike::AllGenders); if (!dst.HasAny(NSpike::AllNumbers)) dst.Add(NSpike::AllNumbers); } // set a noun or adj without additional grammem as indeclinable if (!dst.HasAny(~TMorph::AllPOS()) && (art_grammems.Has(gSubstantive) || TMorph::IsFullAdjective(art_grammems))) dst.Add(NSpike::AllCases | NSpike::AllGenders | NSpike::AllNumbers); }