Ejemplo n.º 1
0
TGramBitSet THomonymInflector::SelectBestHomoform(const CHomonym& hom, const TGramBitSet& desired) {
    if (!hom.Grammems.HasForms())
        return TGramBitSet();

    TGramBitSet res;
    size_t bestCommon = 0;
    bool bestSingular = 0;
    size_t bestNormal = 0;
    // select a form with the maximum of @desired grammems
    for (THomonymGrammems::TFormIter it = hom.Grammems.IterForms(); it.Ok(); ++it) {
        size_t curCommon = (*it & desired).count();
        bool curSingular = it->Has(gSingular);
        size_t curNormal = NInfl::DefaultFeatures().NormalMutable(*it).count();
        bool better = false;
        if (bestCommon < curCommon)
            better = true;
        else if (bestCommon == curCommon) {
            if (curSingular && !bestSingular)
                better = true;
            else if (curSingular == bestSingular && bestNormal < curNormal)
                better = true;
        }

        if (better) {
            res = *it;
            bestCommon = curCommon;
            bestSingular = curSingular;
            bestNormal = curNormal;
        }
    }
    return res;
}
Ejemplo n.º 2
0
static void MergeGrammems(THomonymGrammems& dst, const TGramBitSet& art_grammems, const TGramBitSet& newPos)
{
    // first, reset Part Of Speech if any
    if (newPos.HasAny(TMorph::AllPOS()))
        dst.SetPOS(newPos);
    else if (art_grammems.HasAny(TMorph::AllPOS()))
        dst.SetPOS(art_grammems);

    // take other grammems from @art_grammems if any
    TGramBitSet other = art_grammems & ~TMorph::AllPOS();
    if (other.any()) {
        // if there is a form with such grammems - just leave it alone and drop the rest ones
        bool found = false;
        for (THomonymGrammems::TFormIter it = dst.IterForms(); it.Ok(); ++it)
            if (it->HasAll(other)) {
                dst.ResetSingleForm(*it);
                found = true;
            }

        if (!found) {
            // otherwise merge all forms and replace grammems by classes
            TGramBitSet newForm = dst.All();
            newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllCases);
            newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllGenders);
            newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllNumbers);
            const TGramBitSet anim(gAnimated, gInanimated);
            newForm.ReplaceByMaskIfAny(art_grammems, anim);
            newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllTimes);
            newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllPersons);
            // just add the rest non-classified grammems
            static const TGramBitSet nonclassified = ~(NSpike::AllCases | NSpike::AllGenders | NSpike::AllNumbers |
                                                       anim | NSpike::AllTimes | NSpike::AllPersons);
            newForm |= art_grammems & nonclassified;
            dst.Reset(newForm);
        }
    }

    // if we still do not known POS, apply some workarounds:
    if (dst.GetPOS().none()) {
        dst.SetPOS(TGramBitSet(gSubstantive));
        if (!dst.HasAny(NSpike::AllCases))
            dst.Add(NSpike::AllCases);
        if (!dst.HasAny(NSpike::AllGenders))
            dst.Add(NSpike::AllGenders);
        if (!dst.HasAny(NSpike::AllNumbers))
            dst.Add(NSpike::AllNumbers);
        }

    // set a noun or adj without additional grammem as indeclinable
    if (!dst.HasAny(~TMorph::AllPOS()) &&
        (art_grammems.Has(gSubstantive) || TMorph::IsFullAdjective(art_grammems)))
        dst.Add(NSpike::AllCases | NSpike::AllGenders | NSpike::AllNumbers);

}
Ejemplo n.º 3
0
bool THomonymInflector::FindInForms(const THomonymGrammems& forms, const TGramBitSet& grammems, TGramBitSet& resgram) {
    using NInfl::DefaultFeatures;
    using NInfl::TFeature;
    if (forms.HasForms()) {
        for (THomonymGrammems::TFormIter it = forms.IterForms(); it.Ok(); ++it)
            if (it->HasAll(grammems)) {
                resgram = *it;
                return true;
            }
    } else if (forms.IsIndeclinable() && DefaultFeatures().BitSet(TFeature::Case, TFeature::Number).HasAll(grammems)) {
        resgram = grammems;
        return true;
    }

    return false;
}
void CAnalyticFormBuilder::ChangeGrammemsAsAnalyticForm(CHomonym& H, const CHomonym& VerbHomonym)
{
    THomonymGrammems old_grammems;
    H.Grammems.Swap(old_grammems);

    for (THomonymGrammems::TFormIter old = old_grammems.IterForms(); old.Ok(); ++old)
        for (THomonymGrammems::TFormIter verb = VerbHomonym.Grammems.IterForms(); verb.Ok(); ++verb) {
            Stroka strPos;

            // auxiliary verb grammems
            const TGramBitSet& VerbGrammems = *verb;
            // meaningful part grammems
            TGramBitSet MainGrammems = *old;

            // final grammems to set
            TGramBitSet ResultedGrammems;

            if (MainGrammems.Has(gInfinitive)) {
                ResultedGrammems = MainGrammems & ~TMorph::AllPOS();
                if (VerbGrammems.Has(gImperative)) {
                    // analytical form for imperatives in singular number does not exist
                    if (VerbGrammems.Has(gSingular))
                        continue;
                    ResultedGrammems.Set(gImperative); // "будем же жить!"
                } else
                    ResultedGrammems |= VerbGrammems & NSpike::AllTimes; // "я стал пилить" или "стану писать"

                ResultedGrammems |= VerbGrammems & NSpike::AllPersons;
                ResultedGrammems |= VerbGrammems & NSpike::AllNumbers;
                ResultedGrammems |= VerbGrammems & NSpike::AllGenders;

                //copy all POS grammems from verb to main
                ResultedGrammems |= VerbGrammems & TMorph::AllPOS();

                H.PutAuxArticle(SDictIndex(TEMPLATE_DICT, VerbHomonym.GetAuxArticleIndex(TEMPLATE_DICT)));
                strPos = "Г";
            } else if (TMorph::IsShortParticiple(MainGrammems)) {
                // "*будем же взяты!"
                if (VerbGrammems.Has(gImperative))
                    continue;

                ResultedGrammems = MainGrammems & ~TMorph::AllPOS();
                // remove any time grammems from participle
                ResultedGrammems &= ~NSpike::AllTimes;

                ResultedGrammems |= VerbGrammems & NSpike::AllPersons;
                ResultedGrammems |= VerbGrammems & NSpike::AllTimes;

                if (VerbGrammems.Has(gImperative))  // ??? the same check second time, always false?
                    ResultedGrammems.Set(gImperative);

                strPos = "ПРИЧАСТИЕ";
                ResultedGrammems |= TGramBitSet(gParticiple, gShort);
            } else if (TMorph::IsShortAdjective(MainGrammems)) {
                if (VerbGrammems.Has(gImperative))
                    continue; // будем cчитать, что "будем же красивы!" - это плохо!
                              // на самом деле, просто не хочется вводить  ее кучу кодов.

                ResultedGrammems =  VerbGrammems;
                ResultedGrammems |= MainGrammems & (NSpike::AllNumbers | NSpike::AllGenders | TGramBitSet(gAnimated, gInanimated));
                ResultedGrammems &= ~TMorph::AllPOS();

                if (ResultedGrammems.Has(gActive))
                    ResultedGrammems &= ~TGramBitSet(gActive);

                ResultedGrammems |= TGramBitSet(gAdjective, gShort);
                strPos = "П";
            } else if (MainGrammems.Has(gPraedic))     // "мне было больно"
            {
                ResultedGrammems = VerbGrammems;
                ResultedGrammems |= NSpike::AllCases & MainGrammems;    //copied from PronounPredk code (commented below) - preserve cases if any
                ResultedGrammems &= ~TMorph::AllPOS();

                if (ResultedGrammems.Has(gActive))
                    ResultedGrammems.Reset(gActive);

                strPos = "ПРЕДК";
                ResultedGrammems |= MainGrammems & TMorph::AllPOS();
            } else if (MainGrammems.Has(gComparative))       // он был больше тебя
            {
                ResultedGrammems = (VerbGrammems & ~TMorph::AllPOS()) | TGramBitSet(gComparative);
                if (ResultedGrammems.Has(gActive))
                    ResultedGrammems.Reset(gActive);

                strPos = "П";
                ResultedGrammems |= MainGrammems & TMorph::AllPOS();
            } else if (TMorph::IsFullAdjective(MainGrammems))
                // resolve disambiguity of homonyms, because analytical forms with full adjectives do not exist.
                continue;

            // "стал писать" "стану писать" "стать писать" - совершенный вид
            if (VerbHomonym.Lemma == kStat)
                ResultedGrammems.Reset(gImperfect).Set(gPerfect);

            // if the auxiliary verb was an infinitive then it is all an infinitive
            //  "быть  лучше" или "должно быть принесено"
            if (VerbHomonym.HasGrammem(gInfinitive)) {
                ResultedGrammems &= ~TMorph::AllPOS();
                ResultedGrammems.Set(gInfinitive);
                strPos = "ИНФИНИТИВ";
            } else if (VerbHomonym.HasGrammem(gGerund))     //  "будучи лишней"
            {
                ResultedGrammems &= ~TMorph::AllPOS();
                ResultedGrammems.Set(gGerund);

                strPos = "ДЕЕПРИЧАСТИЕ";
            }

            if (strPos.empty())
                continue;

            /* do some corrections (code taken from RusGramTab.ProcessPOSAndGrammems) */
            if (ResultedGrammems.HasAll(NSpike::AllCases | TGramBitSet(gAdjPronoun)))
                ResultedGrammems |= NSpike::AllGenders | NSpike::AllNumbers;

            if (ResultedGrammems.Has(gMasFem))
                ResultedGrammems |= TGramBitSet(gMasculine, gFeminine);

            if (!ResultedGrammems.Has(gPraedic) && ResultedGrammems.HasAll(NSpike::AllCases) && !ResultedGrammems.Has(gSingular))
                ResultedGrammems |= TGramBitSet(gSingular, gPlural);

            H.Grammems.AddForm(ResultedGrammems);
        }
}