コード例 #1
0
ファイル: word.cpp プロジェクト: Frankie-666/tomita-parser
bool CWord::RightPartIsSurname(int& iH, THomonymGrammems& grammems, Wtroka& strLemma)
{
    iH = HasMorphNounWithGrammems_i(TGramBitSet(gSurname));
    if (iH != -1) {
        CHomonym& h = GetRusHomonym(iH);
        grammems = h.Grammems;
        strLemma = h.GetLemma();
        size_t ii = strLemma.find('-');
        YASSERT(ii != Wtroka::npos);
        strLemma = strLemma.substr(ii + 1);
        return true;
    }

    //if this word is in morphology - do not try to predict
    if (IsDictionary())
        return false;

    size_t ii = m_txt.find('-');
    if (ii == Wtroka::npos)
        return false;
    Wtroka strRightPart = m_txt.substr(ii + 1);
    TMorph::ToLower(strRightPart);

    yvector<TSurnamePredictor::TPredictedSurname> out;
    if (!TMorph::PredictSurname(strRightPart, out))
        return false;

    TGrammarBunch newForms;
    NSpike::ToGrammarBunch(out[0].StemGrammar, out[0].FlexGrammars, newForms);
    grammems.Reset(newForms);
    strLemma = out[0].Lemma;
    return true;
}
コード例 #2
0
static void MergeGrammems(THomonymGrammems& dst, const TGramBitSet& art_grammems, const TGramBitSet& newPos)
{
    // first, reset Part Of Speech if any
    if (newPos.HasAny(TMorph::AllPOS()))
        dst.SetPOS(newPos);
    else if (art_grammems.HasAny(TMorph::AllPOS()))
        dst.SetPOS(art_grammems);

    // take other grammems from @art_grammems if any
    TGramBitSet other = art_grammems & ~TMorph::AllPOS();
    if (other.any()) {
        // if there is a form with such grammems - just leave it alone and drop the rest ones
        bool found = false;
        for (THomonymGrammems::TFormIter it = dst.IterForms(); it.Ok(); ++it)
            if (it->HasAll(other)) {
                dst.ResetSingleForm(*it);
                found = true;
            }

        if (!found) {
            // otherwise merge all forms and replace grammems by classes
            TGramBitSet newForm = dst.All();
            newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllCases);
            newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllGenders);
            newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllNumbers);
            const TGramBitSet anim(gAnimated, gInanimated);
            newForm.ReplaceByMaskIfAny(art_grammems, anim);
            newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllTimes);
            newForm.ReplaceByMaskIfAny(art_grammems, NSpike::AllPersons);
            // just add the rest non-classified grammems
            static const TGramBitSet nonclassified = ~(NSpike::AllCases | NSpike::AllGenders | NSpike::AllNumbers |
                                                       anim | NSpike::AllTimes | NSpike::AllPersons);
            newForm |= art_grammems & nonclassified;
            dst.Reset(newForm);
        }
    }

    // if we still do not known POS, apply some workarounds:
    if (dst.GetPOS().none()) {
        dst.SetPOS(TGramBitSet(gSubstantive));
        if (!dst.HasAny(NSpike::AllCases))
            dst.Add(NSpike::AllCases);
        if (!dst.HasAny(NSpike::AllGenders))
            dst.Add(NSpike::AllGenders);
        if (!dst.HasAny(NSpike::AllNumbers))
            dst.Add(NSpike::AllNumbers);
        }

    // set a noun or adj without additional grammem as indeclinable
    if (!dst.HasAny(~TMorph::AllPOS()) &&
        (art_grammems.Has(gSubstantive) || TMorph::IsFullAdjective(art_grammems)))
        dst.Add(NSpike::AllCases | NSpike::AllGenders | NSpike::AllNumbers);

}