void CMultiWordCreator::NormalizeMultiWordHomonym(CWord* pW,  CHomonym* pH)
{
    CWordSequence* pWS = pH->GetSourceWordSequence();
    pWS->ClearLemmas();

    Wtroka lemma_text;
    if (pWS->HasAuxArticle()) {
        const article_t* pArt = GlobalDictsHolder->GetAuxArticle(pWS->GetAuxArticleIndex());
        if (pArt->has_lemma())
            lemma_text = pArt->get_lemma();
    } else if (pWS->HasGztArticle()) {
        const TGztArticle& gzt_article = pWS->GetGztArticle();
        const NGzt::TMessage* lemma_info = gzt_article.GetLemmaInfo();
        if (lemma_info != NULL)
            lemma_text = gzt_article.GetLemmaText(*lemma_info);
    }

    if (!lemma_text.empty()) {
        //если есть лемма, указанная в статье
        Wtroka capitalizedLemma = CNormalization::GetCapitalizedLemma(*pW, lemma_text);
        pWS->AddLemma(SWordSequenceLemma(lemma_text, capitalizedLemma));
    } else {
        if (pWS->Size() == 1) {
            // for single-word sequences - just take a lemma of corresponding word
            pWS->AddLemma(SWordSequenceLemma(m_Words[pWS->GetMainWord()].GetLemma()), true);
        } else for (int i = pWS->FirstWord(); i <= pWS->LastWord(); ++i) {
            // not normalized now, will be normalized lazily on request
            pWS->AddLemma(SWordSequenceLemma(m_Words[i].GetText()), false);
        }
    }

}
void CSitFactInterpretation::FillConstFieldValue(const fact_field_reference_t& fact_field, yvector<CFactFields>& newFacts, const SWordHomonymNum& valValue)
{
    const CWord& w = m_Words.GetWord(valValue);
    switch (fact_field.m_Field_type) {
        case TextField: {
            CTextWS ws;
            ws.SetArtificialPair(w.GetSourcePair());
            ws.AddLemma(SWordSequenceLemma(fact_field.m_StringValue));
            FillWSFactField(fact_field, ws, newFacts);
            break;
        } case BoolField: {
            CBoolWS ws(fact_field.m_bBoolValue);
            ws.SetPair(w.GetSourcePair());
            FillWSFactField(fact_field, ws, newFacts);
            break;
        } default:
            break;
    }
}
void CQuotesFinder::CreateTextField(const Stroka& TextFieldName, CSentence* pSent, const SWordHomonymNum& WH,
                                    CFactFields& factFields, const Wtroka& Postfix)
{
    CNormalization InterpNorm(pSent->m_Words);
    const CHomonym& h = pSent->m_Words[WH];

    const CFioWordSequence* pFioWS = dynamic_cast<const CFioWordSequence*>(h.GetSourceWordSequence());
    if (pFioWS != NULL) {
        CFioWS newFioWS(*pFioWS);
        newFioWS.SetMainWord(WH);
        newFioWS.AddLemma(SWordSequenceLemma (InterpNorm.GetArtificialLemma(WH)));

        CTextWS newTextWS;
        newTextWS.SetPair(*pFioWS);
        newTextWS.SetMainWord(WH);
        newTextWS.ResetLemmas(newFioWS.GetLemmas(), true);
        if (!Postfix.empty())
            newTextWS.AddLemma(SWordSequenceLemma(Postfix));
        factFields.AddValue(TextFieldName, newTextWS);
    } else {
        const CFactsWS* pFactWS = dynamic_cast<const CFactsWS*>(h.GetSourceWordSequence());
        if (pFactWS) {
            CTextWS newTextWS;
            newTextWS.SetPair(*pFactWS);
            newTextWS.SetMainWord(WH);
            newTextWS.ResetLemmas(pFactWS->GetLemmas(), true);
            if (!Postfix.empty())
                newTextWS.AddLemma(SWordSequenceLemma(Postfix));
            factFields.AddValue(TextFieldName, newTextWS);
        } else {
            const CTextWS* pTextWS = dynamic_cast<const CTextWS*>(h.GetSourceWordSequence());
            if (pTextWS) {
                CTextWS newTextWS = *pTextWS;
                if (!Postfix.empty())
                    newTextWS.AddLemma(SWordSequenceLemma(Postfix));
                factFields.AddValue(TextFieldName, newTextWS);
            } else {
                if (h.GetSourceWordSequence() == 0 || h.GetSourceWordSequence()->Size() == 1) {
                    CTextWS newTextWS;
                    newTextWS.SetPair(pSent->m_Words.GetWord(WH).GetSourcePair());
                    newTextWS.SetMainWord(WH);
                    newTextWS.AddLemma(SWordSequenceLemma(h.GetLemma()));
                    if (!Postfix.empty())
                        newTextWS.AddLemma(SWordSequenceLemma(Postfix));
                    factFields.AddValue(TextFieldName, newTextWS);
                }
            }
        }
    }
}
bool CQuotesFinder::CreateQuoteValue(const Wtroka& quoteStr, CFactFields& factFields) const
{
    DECLARE_STATIC_RUS_WORD(kChto, "что ");
    DECLARE_STATIC_RUS_WORD(kChtoby, "чтобы ");

    Wtroka qstr = quoteStr;

    CTextWS QuoteWS;
    if (qstr.has_prefix(kChto))
        qstr.erase(0,4);
    if (qstr.has_prefix(kChtoby))
        qstr.erase(0,6);
    if (qstr.empty())
         return false;
    qstr = StripString(qstr);
    if (qstr.size() > 1 && qstr[0] =='"' && qstr.back() =='"')
        qstr = qstr.substr(1, qstr.size() - 2);
    qstr = StripString(qstr);

    QuoteWS.AddLemma(SWordSequenceLemma(qstr));
    QuoteWS.SetArtificialPair(CWordsPair(0,0));
    factFields.AddValue(CFactFields::QuoteValue, QuoteWS);
    return true;
}
Exemple #5
0
void CSentence::AddFIOWS(const CFIOOccurence& fioOccurence, const SFullFIO& fio, int iSimilarOccurencesCount)
{
    TIntrusivePtr<CFioWordSequence> fioWS(new CFioWordSequence(fio));
    *(CFIOOccurence*)(fioWS.Get()) = fioOccurence;
    fioWS->PutWSType(FioWS);
    if (fio.m_Genders.any()) {
        THomonymGrammems gram = fioWS->GetGrammems();
        gram.Replace(NSpike::AllGenders, fio.m_Genders);
        fioWS->SetGrammems(gram);
    }

    fioWS->m_iSimilarOccurencesCount = iSimilarOccurencesCount;

    bool isManualFio = true;

    SWordHomonymNum wh = fioOccurence.m_NameMembers[Surname];
    if (wh.IsValid())
        if (m_Words.GetWord(wh).IsMultiWord())
            isManualFio = false;

    if (!fio.m_bFoundSurname &&
        fioOccurence.m_NameMembers[Surname].IsValid() &&
        !(fioOccurence.m_NameMembers[FirstName].IsValid() ||
            fioOccurence.m_NameMembers[InitialName].IsValid()) &&
            isManualFio) {
        CNameFinder nameFinder(m_Words);
        //если не смогли среди предсказанных фамилий
        //найти совпадающую с фамилией из fio, то вываливаемся
        if (!nameFinder.PredictSingleSurname(*fioWS, fio))
            return;
    }

    fioWS->ClearLemmas();
    if (!fio.m_strSurname.empty()) {
        Wtroka capLemma;
        if (fioOccurence.m_NameMembers[Surname].IsValid()) {
            const CWord& w = m_Words.GetWord(fioOccurence.m_NameMembers[Surname]);
            capLemma = GetCapitalizedLemma(w, -1, fio.m_strSurname);
        } else {
            capLemma = fio.m_strSurname;
            NStr::ToFirstUpper(capLemma);
        }
        fioWS->AddLemma(SWordSequenceLemma(fio.m_strSurname, capLemma));
    }
    if (!fio.m_strName.empty()) {
        Wtroka capLemma;
        if (fioOccurence.m_NameMembers[FirstName].IsValid()) {
            const CWord& w = m_Words.GetWord(fioOccurence.m_NameMembers[FirstName]);
            capLemma = GetCapitalizedLemma(w, -1, fio.m_strName);
        } else if (fioOccurence.m_NameMembers[InitialName].IsValid()) {
            const CWord& w = m_Words.GetWord(fioOccurence.m_NameMembers[InitialName]);
            capLemma = GetCapitalizedLemma(w, -1, fio.m_strName);
        } else {
            capLemma = fio.m_strName;
            TMorph::ToTitle(capLemma);

        }
        fioWS->AddLemma(SWordSequenceLemma(fio.m_strName, capLemma));
    }
    if (!fio.m_strPatronomyc.empty()) {
        Wtroka capLemma;
        if (fioOccurence.m_NameMembers[Patronomyc].IsValid()) {
            const CWord& w = m_Words.GetWord(fioOccurence.m_NameMembers[Patronomyc]);
            capLemma = GetCapitalizedLemma(w, -1, fio.m_strPatronomyc);
        } else if (fioOccurence.m_NameMembers[InitialPatronomyc].IsValid()) {
            const CWord& w = m_Words.GetWord(fioOccurence.m_NameMembers[InitialPatronomyc]);
            capLemma = GetCapitalizedLemma(w, -1, fio.m_strPatronomyc);
        } else {
            capLemma = fio.m_strPatronomyc;
            TMorph::ToTitle(capLemma);
        }

        fioWS->AddLemma(SWordSequenceLemma(fio.m_strPatronomyc, capLemma));
    }
    TakeFioWS(fioWS);
}