TGramBitSet THomonymInflector::SelectBestHomoform(const CHomonym& hom, const TGramBitSet& desired) { if (!hom.Grammems.HasForms()) return TGramBitSet(); TGramBitSet res; size_t bestCommon = 0; bool bestSingular = 0; size_t bestNormal = 0; // select a form with the maximum of @desired grammems for (THomonymGrammems::TFormIter it = hom.Grammems.IterForms(); it.Ok(); ++it) { size_t curCommon = (*it & desired).count(); bool curSingular = it->Has(gSingular); size_t curNormal = NInfl::DefaultFeatures().NormalMutable(*it).count(); bool better = false; if (bestCommon < curCommon) better = true; else if (bestCommon == curCommon) { if (curSingular && !bestSingular) better = true; else if (curSingular == bestSingular && bestNormal < curNormal) better = true; } if (better) { res = *it; bestCommon = curCommon; bestSingular = curSingular; bestNormal = curNormal; } } return res; }
static void MergeGrammems(THomonymGrammems& dst, const TGramBitSet& art_grammems, const TGramBitSet& newPos) { // first, reset Part Of Speech if any if (newPos.HasAny(TMorph::AllPOS())) dst.SetPOS(newPos); else if (art_grammems.HasAny(TMorph::AllPOS())) dst.SetPOS(art_grammems); // take other grammems from @art_grammems if any TGramBitSet other = art_grammems & ~TMorph::AllPOS(); if (other.any()) { // if there is a form with such grammems - just leave it alone and drop the rest ones bool found = false; for (THomonymGrammems::TFormIter it = dst.IterForms(); it.Ok(); ++it) if (it->HasAll(other)) { dst.ResetSingleForm(*it); found = true; } if (!found) { // otherwise merge all forms and replace grammems by classes TGramBitSet newForm = dst.All(); newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllCases); newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllGenders); newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllNumbers); const TGramBitSet anim(gAnimated, gInanimated); newForm.ReplaceByMaskIfAny(art_grammems, anim); newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllTimes); newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllPersons); // just add the rest non-classified grammems static const TGramBitSet nonclassified = ~(NSpike::AllCases | NSpike::AllGenders | NSpike::AllNumbers | anim | NSpike::AllTimes | NSpike::AllPersons); newForm |= art_grammems & nonclassified; dst.Reset(newForm); } } // if we still do not known POS, apply some workarounds: if (dst.GetPOS().none()) { dst.SetPOS(TGramBitSet(gSubstantive)); if (!dst.HasAny(NSpike::AllCases)) dst.Add(NSpike::AllCases); if (!dst.HasAny(NSpike::AllGenders)) dst.Add(NSpike::AllGenders); if (!dst.HasAny(NSpike::AllNumbers)) dst.Add(NSpike::AllNumbers); } // set a noun or adj without additional grammem as indeclinable if (!dst.HasAny(~TMorph::AllPOS()) && (art_grammems.Has(gSubstantive) || TMorph::IsFullAdjective(art_grammems))) dst.Add(NSpike::AllCases | NSpike::AllGenders | NSpike::AllNumbers); }
bool THomonymInflector::FindInForms(const THomonymGrammems& forms, const TGramBitSet& grammems, TGramBitSet& resgram) { using NInfl::DefaultFeatures; using NInfl::TFeature; if (forms.HasForms()) { for (THomonymGrammems::TFormIter it = forms.IterForms(); it.Ok(); ++it) if (it->HasAll(grammems)) { resgram = *it; return true; } } else if (forms.IsIndeclinable() && DefaultFeatures().BitSet(TFeature::Case, TFeature::Number).HasAll(grammems)) { resgram = grammems; return true; } return false; }
void CAnalyticFormBuilder::ChangeGrammemsAsAnalyticForm(CHomonym& H, const CHomonym& VerbHomonym) { THomonymGrammems old_grammems; H.Grammems.Swap(old_grammems); for (THomonymGrammems::TFormIter old = old_grammems.IterForms(); old.Ok(); ++old) for (THomonymGrammems::TFormIter verb = VerbHomonym.Grammems.IterForms(); verb.Ok(); ++verb) { Stroka strPos; // auxiliary verb grammems const TGramBitSet& VerbGrammems = *verb; // meaningful part grammems TGramBitSet MainGrammems = *old; // final grammems to set TGramBitSet ResultedGrammems; if (MainGrammems.Has(gInfinitive)) { ResultedGrammems = MainGrammems & ~TMorph::AllPOS(); if (VerbGrammems.Has(gImperative)) { // analytical form for imperatives in singular number does not exist if (VerbGrammems.Has(gSingular)) continue; ResultedGrammems.Set(gImperative); // "будем же жить!" } else ResultedGrammems |= VerbGrammems & NSpike::AllTimes; // "я стал пилить" или "стану писать" ResultedGrammems |= VerbGrammems & NSpike::AllPersons; ResultedGrammems |= VerbGrammems & NSpike::AllNumbers; ResultedGrammems |= VerbGrammems & NSpike::AllGenders; //copy all POS grammems from verb to main ResultedGrammems |= VerbGrammems & TMorph::AllPOS(); H.PutAuxArticle(SDictIndex(TEMPLATE_DICT, VerbHomonym.GetAuxArticleIndex(TEMPLATE_DICT))); strPos = "Г"; } else if (TMorph::IsShortParticiple(MainGrammems)) { // "*будем же взяты!" if (VerbGrammems.Has(gImperative)) continue; ResultedGrammems = MainGrammems & ~TMorph::AllPOS(); // remove any time grammems from participle ResultedGrammems &= ~NSpike::AllTimes; ResultedGrammems |= VerbGrammems & NSpike::AllPersons; ResultedGrammems |= VerbGrammems & NSpike::AllTimes; if (VerbGrammems.Has(gImperative)) // ??? the same check second time, always false? ResultedGrammems.Set(gImperative); strPos = "ПРИЧАСТИЕ"; ResultedGrammems |= TGramBitSet(gParticiple, gShort); } else if (TMorph::IsShortAdjective(MainGrammems)) { if (VerbGrammems.Has(gImperative)) continue; // будем cчитать, что "будем же красивы!" - это плохо! // на самом деле, просто не хочется вводить ее кучу кодов. ResultedGrammems = VerbGrammems; ResultedGrammems |= MainGrammems & (NSpike::AllNumbers | NSpike::AllGenders | TGramBitSet(gAnimated, gInanimated)); ResultedGrammems &= ~TMorph::AllPOS(); if (ResultedGrammems.Has(gActive)) ResultedGrammems &= ~TGramBitSet(gActive); ResultedGrammems |= TGramBitSet(gAdjective, gShort); strPos = "П"; } else if (MainGrammems.Has(gPraedic)) // "мне было больно" { ResultedGrammems = VerbGrammems; ResultedGrammems |= NSpike::AllCases & MainGrammems; //copied from PronounPredk code (commented below) - preserve cases if any ResultedGrammems &= ~TMorph::AllPOS(); if (ResultedGrammems.Has(gActive)) ResultedGrammems.Reset(gActive); strPos = "ПРЕДК"; ResultedGrammems |= MainGrammems & TMorph::AllPOS(); } else if (MainGrammems.Has(gComparative)) // он был больше тебя { ResultedGrammems = (VerbGrammems & ~TMorph::AllPOS()) | TGramBitSet(gComparative); if (ResultedGrammems.Has(gActive)) ResultedGrammems.Reset(gActive); strPos = "П"; ResultedGrammems |= MainGrammems & TMorph::AllPOS(); } else if (TMorph::IsFullAdjective(MainGrammems)) // resolve disambiguity of homonyms, because analytical forms with full adjectives do not exist. continue; // "стал писать" "стану писать" "стать писать" - совершенный вид if (VerbHomonym.Lemma == kStat) ResultedGrammems.Reset(gImperfect).Set(gPerfect); // if the auxiliary verb was an infinitive then it is all an infinitive // "быть лучше" или "должно быть принесено" if (VerbHomonym.HasGrammem(gInfinitive)) { ResultedGrammems &= ~TMorph::AllPOS(); ResultedGrammems.Set(gInfinitive); strPos = "ИНФИНИТИВ"; } else if (VerbHomonym.HasGrammem(gGerund)) // "будучи лишней" { ResultedGrammems &= ~TMorph::AllPOS(); ResultedGrammems.Set(gGerund); strPos = "ДЕЕПРИЧАСТИЕ"; } if (strPos.empty()) continue; /* do some corrections (code taken from RusGramTab.ProcessPOSAndGrammems) */ if (ResultedGrammems.HasAll(NSpike::AllCases | TGramBitSet(gAdjPronoun))) ResultedGrammems |= NSpike::AllGenders | NSpike::AllNumbers; if (ResultedGrammems.Has(gMasFem)) ResultedGrammems |= TGramBitSet(gMasculine, gFeminine); if (!ResultedGrammems.Has(gPraedic) && ResultedGrammems.HasAll(NSpike::AllCases) && !ResultedGrammems.Has(gSingular)) ResultedGrammems |= TGramBitSet(gSingular, gPlural); H.Grammems.AddForm(ResultedGrammems); } }