void CMultiWordCreator::NormalizeMultiWordHomonym(CWord* pW, CHomonym* pH) { CWordSequence* pWS = pH->GetSourceWordSequence(); pWS->ClearLemmas(); Wtroka lemma_text; if (pWS->HasAuxArticle()) { const article_t* pArt = GlobalDictsHolder->GetAuxArticle(pWS->GetAuxArticleIndex()); if (pArt->has_lemma()) lemma_text = pArt->get_lemma(); } else if (pWS->HasGztArticle()) { const TGztArticle& gzt_article = pWS->GetGztArticle(); const NGzt::TMessage* lemma_info = gzt_article.GetLemmaInfo(); if (lemma_info != NULL) lemma_text = gzt_article.GetLemmaText(*lemma_info); } if (!lemma_text.empty()) { //если есть лемма, указанная в статье Wtroka capitalizedLemma = CNormalization::GetCapitalizedLemma(*pW, lemma_text); pWS->AddLemma(SWordSequenceLemma(lemma_text, capitalizedLemma)); } else { if (pWS->Size() == 1) { // for single-word sequences - just take a lemma of corresponding word pWS->AddLemma(SWordSequenceLemma(m_Words[pWS->GetMainWord()].GetLemma()), true); } else for (int i = pWS->FirstWord(); i <= pWS->LastWord(); ++i) { // not normalized now, will be normalized lazily on request pWS->AddLemma(SWordSequenceLemma(m_Words[i].GetText()), false); } } }
void CSitFactInterpretation::FillConstFieldValue(const fact_field_reference_t& fact_field, yvector<CFactFields>& newFacts, const SWordHomonymNum& valValue) { const CWord& w = m_Words.GetWord(valValue); switch (fact_field.m_Field_type) { case TextField: { CTextWS ws; ws.SetArtificialPair(w.GetSourcePair()); ws.AddLemma(SWordSequenceLemma(fact_field.m_StringValue)); FillWSFactField(fact_field, ws, newFacts); break; } case BoolField: { CBoolWS ws(fact_field.m_bBoolValue); ws.SetPair(w.GetSourcePair()); FillWSFactField(fact_field, ws, newFacts); break; } default: break; } }
void CQuotesFinder::CreateTextField(const Stroka& TextFieldName, CSentence* pSent, const SWordHomonymNum& WH, CFactFields& factFields, const Wtroka& Postfix) { CNormalization InterpNorm(pSent->m_Words); const CHomonym& h = pSent->m_Words[WH]; const CFioWordSequence* pFioWS = dynamic_cast<const CFioWordSequence*>(h.GetSourceWordSequence()); if (pFioWS != NULL) { CFioWS newFioWS(*pFioWS); newFioWS.SetMainWord(WH); newFioWS.AddLemma(SWordSequenceLemma (InterpNorm.GetArtificialLemma(WH))); CTextWS newTextWS; newTextWS.SetPair(*pFioWS); newTextWS.SetMainWord(WH); newTextWS.ResetLemmas(newFioWS.GetLemmas(), true); if (!Postfix.empty()) newTextWS.AddLemma(SWordSequenceLemma(Postfix)); factFields.AddValue(TextFieldName, newTextWS); } else { const CFactsWS* pFactWS = dynamic_cast<const CFactsWS*>(h.GetSourceWordSequence()); if (pFactWS) { CTextWS newTextWS; newTextWS.SetPair(*pFactWS); newTextWS.SetMainWord(WH); newTextWS.ResetLemmas(pFactWS->GetLemmas(), true); if (!Postfix.empty()) newTextWS.AddLemma(SWordSequenceLemma(Postfix)); factFields.AddValue(TextFieldName, newTextWS); } else { const CTextWS* pTextWS = dynamic_cast<const CTextWS*>(h.GetSourceWordSequence()); if (pTextWS) { CTextWS newTextWS = *pTextWS; if (!Postfix.empty()) newTextWS.AddLemma(SWordSequenceLemma(Postfix)); factFields.AddValue(TextFieldName, newTextWS); } else { if (h.GetSourceWordSequence() == 0 || h.GetSourceWordSequence()->Size() == 1) { CTextWS newTextWS; newTextWS.SetPair(pSent->m_Words.GetWord(WH).GetSourcePair()); newTextWS.SetMainWord(WH); newTextWS.AddLemma(SWordSequenceLemma(h.GetLemma())); if (!Postfix.empty()) newTextWS.AddLemma(SWordSequenceLemma(Postfix)); factFields.AddValue(TextFieldName, newTextWS); } } } } }
bool CQuotesFinder::CreateQuoteValue(const Wtroka& quoteStr, CFactFields& factFields) const { DECLARE_STATIC_RUS_WORD(kChto, "что "); DECLARE_STATIC_RUS_WORD(kChtoby, "чтобы "); Wtroka qstr = quoteStr; CTextWS QuoteWS; if (qstr.has_prefix(kChto)) qstr.erase(0,4); if (qstr.has_prefix(kChtoby)) qstr.erase(0,6); if (qstr.empty()) return false; qstr = StripString(qstr); if (qstr.size() > 1 && qstr[0] =='"' && qstr.back() =='"') qstr = qstr.substr(1, qstr.size() - 2); qstr = StripString(qstr); QuoteWS.AddLemma(SWordSequenceLemma(qstr)); QuoteWS.SetArtificialPair(CWordsPair(0,0)); factFields.AddValue(CFactFields::QuoteValue, QuoteWS); return true; }
void CSentence::AddFIOWS(const CFIOOccurence& fioOccurence, const SFullFIO& fio, int iSimilarOccurencesCount) { TIntrusivePtr<CFioWordSequence> fioWS(new CFioWordSequence(fio)); *(CFIOOccurence*)(fioWS.Get()) = fioOccurence; fioWS->PutWSType(FioWS); if (fio.m_Genders.any()) { THomonymGrammems gram = fioWS->GetGrammems(); gram.Replace(NSpike::AllGenders, fio.m_Genders); fioWS->SetGrammems(gram); } fioWS->m_iSimilarOccurencesCount = iSimilarOccurencesCount; bool isManualFio = true; SWordHomonymNum wh = fioOccurence.m_NameMembers[Surname]; if (wh.IsValid()) if (m_Words.GetWord(wh).IsMultiWord()) isManualFio = false; if (!fio.m_bFoundSurname && fioOccurence.m_NameMembers[Surname].IsValid() && !(fioOccurence.m_NameMembers[FirstName].IsValid() || fioOccurence.m_NameMembers[InitialName].IsValid()) && isManualFio) { CNameFinder nameFinder(m_Words); //если не смогли среди предсказанных фамилий //найти совпадающую с фамилией из fio, то вываливаемся if (!nameFinder.PredictSingleSurname(*fioWS, fio)) return; } fioWS->ClearLemmas(); if (!fio.m_strSurname.empty()) { Wtroka capLemma; if (fioOccurence.m_NameMembers[Surname].IsValid()) { const CWord& w = m_Words.GetWord(fioOccurence.m_NameMembers[Surname]); capLemma = GetCapitalizedLemma(w, -1, fio.m_strSurname); } else { capLemma = fio.m_strSurname; NStr::ToFirstUpper(capLemma); } fioWS->AddLemma(SWordSequenceLemma(fio.m_strSurname, capLemma)); } if (!fio.m_strName.empty()) { Wtroka capLemma; if (fioOccurence.m_NameMembers[FirstName].IsValid()) { const CWord& w = m_Words.GetWord(fioOccurence.m_NameMembers[FirstName]); capLemma = GetCapitalizedLemma(w, -1, fio.m_strName); } else if (fioOccurence.m_NameMembers[InitialName].IsValid()) { const CWord& w = m_Words.GetWord(fioOccurence.m_NameMembers[InitialName]); capLemma = GetCapitalizedLemma(w, -1, fio.m_strName); } else { capLemma = fio.m_strName; TMorph::ToTitle(capLemma); } fioWS->AddLemma(SWordSequenceLemma(fio.m_strName, capLemma)); } if (!fio.m_strPatronomyc.empty()) { Wtroka capLemma; if (fioOccurence.m_NameMembers[Patronomyc].IsValid()) { const CWord& w = m_Words.GetWord(fioOccurence.m_NameMembers[Patronomyc]); capLemma = GetCapitalizedLemma(w, -1, fio.m_strPatronomyc); } else if (fioOccurence.m_NameMembers[InitialPatronomyc].IsValid()) { const CWord& w = m_Words.GetWord(fioOccurence.m_NameMembers[InitialPatronomyc]); capLemma = GetCapitalizedLemma(w, -1, fio.m_strPatronomyc); } else { capLemma = fio.m_strPatronomyc; TMorph::ToTitle(capLemma); } fioWS->AddLemma(SWordSequenceLemma(fio.m_strPatronomyc, capLemma)); } TakeFioWS(fioWS); }