void CWord::DeleteHomonym(CHomonym& hom) { if (!hom.IsDeleted()) { ++m_DeletedHomCount; hom.Delete(); } }
void CHomonym::ResetGenderFromHomonym(const CHomonym& h) { if (h.HasGrammem(gFeminine)) Grammems.Replace(gMasculine, gFeminine); if (h.HasGrammem(gMasculine)) Grammems.Replace(gFeminine, gMasculine); }
void CMultiWordCreator::AddQuoteMultiWord(CWordSequence& ws, const TArticleRef& article) { SWordHomonymNum wh; Wtroka str; CWord* pNewWord = GetWordForMultiWord(ws, str, wh); if (pNewWord->m_SourceWords.Size() == 1 && pNewWord->HasOnlyUnknownPOS()) { size_t firstId = pNewWord->IterHomonyms().GetID(); CHomonym& h = pNewWord->GetRusHomonym(firstId); h.SetSourceWordSequence(&ws); h.PutArticle(article); wh.m_HomNum = firstId; } else { pNewWord->m_SourceWords.SetPair(ws.FirstWord(), ws.LastWord()); if (str.size() == 0) str = pNewWord->m_txt; TMorph::ToLower(str); CHomonym* pNewHom = new CHomonym(TMorph::GetMainLanguage(), str); pNewHom->SetSourceWordSequence(&ws); pNewHom->PutArticle(article); wh.m_HomNum = pNewWord->AddRusHomonym(pNewHom); } if (article.AuxDic().IsValid()) { const article_t* pArt = GlobalDictsHolder->GetAuxArticle(article.AuxDic()); YASSERT(pArt != NULL); AddFoundArticle(pArt->get_kw_type(), pArt->get_title(), wh); } else { YASSERT(!article.Gzt().Empty()); AddFoundArticle(article.Gzt().GetType(), article.Gzt().GetTitle(), wh); } m_wordSequences.push_back(&ws); }
void CWord::UndeleteHomonym(CHomonym& hom) { if (hom.IsDeleted()) { if (m_DeletedHomCount > 0) --m_DeletedHomCount; hom.Undelete(); } }
void CWord::AddFio(yset<Wtroka>& fioStrings, bool bIndexed) { yset<Wtroka>::iterator it = fioStrings.begin(); for (; it != fioStrings.end(); it++) { CHomonym* pNewHom = new CHomonym(TMorph::GetMainLanguage(), *it); if (*it == g_strFIONonTerminal) pNewHom->SetIsDictionary(true); else pNewHom->SetIsDictionary(bIndexed); m_variant.push_back(pNewHom); } }
bool CWord::CanAddHomonym(const CHomonym& h) const { //make sure that abbreviations (e.g. PO, OAO, etc.) are uppercased if (h.HasGrammem(gAbbreviation) && !m_bIgnoreUpperCase) { if (!m_bUp) return false; for (size_t i = 0; i < m_txt.size(); ++i) if (!::IsUpper(m_txt[i])) return false; } if ((h.IsGeo() || h.IsName()) && h.IsMorphNoun() && !m_bUp && !m_bIgnoreUpperCase) return false; return true; }
bool CLemWord::AddNextHomonym(const char* sPlmLine) { char buffer[CriticalGraphemLineLength*2]; assert (strlen(sPlmLine) < CriticalGraphemLineLength*2); strcpy(buffer, sPlmLine); char* strPlmLine = buffer; assert (strPlmLine[0] == ' '); assert (strPlmLine[1] == ' '); strPlmLine += 2; assert ( GetHomonymsCount() ); size_t iPlmLineLen = strlen(strPlmLine); rtrim(strPlmLine,&iPlmLineLen); size_t iFirstFieldLen = strcspn(strPlmLine," "); size_t iSomeUnusefulDigitsLen = strspn(strPlmLine + iFirstFieldLen," -1234567890"); CHomonym* pHomonym = AddNewHomonym(); strPlmLine += iFirstFieldLen + iSomeUnusefulDigitsLen; int MorphSectionOffset = ProcessGraphematicalDescriptors(strPlmLine); if (!pHomonym->ProcessLemmaAndGrammems(strPlmLine+MorphSectionOffset)) return false; { const CHomonym* pFirstHom = GetHomonym(0); pHomonym->m_bInOb = pFirstHom->m_bInOb; pHomonym->m_bOborot1 = pFirstHom->m_bOborot1; pHomonym->m_bOborot2 = pFirstHom->m_bOborot2; pHomonym->m_OborotNo = pFirstHom->m_OborotNo; } InitLevelSpecific(pHomonym); return true; }
void CWord::AddHyphenSurnameLemma(int iH, const THomonymGrammems& forms, const Wtroka& strLemma) { if (iH == -1) { if (m_Homonyms.size() == 1) { m_Homonyms[0]->SetLemma(strLemma); m_Homonyms[0]->SetGrammems(forms); m_Homonyms[0]->SetNameType(Surname); return; } else { CHomonym* pH = new CHomonym(TMorph::GetMainLanguage(), strLemma); pH->SetGrammems(forms); pH->SetNameType(Surname); AddRusHomonym(pH); return; } } else { CHomonym& h = GetRusHomonym(iH); h.SetLemma(strLemma); h.SetNameType(Surname); } }
bool CLemWord::ProcessPlmLineForTheFirstHomonym(const char* sPlmLine, MorphLanguageEnum langua, int& OborotNo) { char buffer[CriticalGraphemLineLength*2]; assert (strlen(sPlmLine) < CriticalGraphemLineLength*2); strcpy(buffer, sPlmLine); char* strPlmLine = buffer; // откусываю признаки конца строки, если они по¤вились size_t iPlmLineLen = strlen(strPlmLine); rtrim(strPlmLine,&iPlmLineLen); size_t iFirstFieldLen = strcspn(strPlmLine," "); char WordBuffer[CriticalTokenLength+1]; strncpy(WordBuffer, strPlmLine, iFirstFieldLen); WordBuffer[iFirstFieldLen] = '\0'; SetWordStr(WordBuffer, langua); size_t iSomeUnusefulDigitsLen = strspn(strPlmLine + iFirstFieldLen," -1234567890"); // reading file position of an item from graline if (sscanf(strPlmLine + iFirstFieldLen, "%i %i", &m_GraphematicalUnitOffset, &m_TokenLengthInFile) != 2) return false; strPlmLine += iFirstFieldLen + iSomeUnusefulDigitsLen; int MorphSectionOffset = ProcessGraphematicalDescriptors(strPlmLine); if( m_bSpace ) return true; CHomonym* pHomonym = AddNewHomonym(); if (HasDes(OEXPR1)) OborotNo = ParseOborotNo(m_UnparsedGraphemDescriptorsStr); if (OborotNo != -1) { pHomonym->m_OborotNo = OborotNo; pHomonym->m_bInOb = true; pHomonym->m_bOborot1 = HasDes(OEXPR1); pHomonym->m_bOborot2 = HasDes(OEXPR2); } if (HasDes(OEXPR2)) OborotNo = -1; if( MorphSectionOffset == strlen(strPlmLine) ) pHomonym->SetLemma(m_strWord); else if (!pHomonym->ProcessLemmaAndGrammems(strPlmLine+MorphSectionOffset)) return false; InitLevelSpecific(pHomonym); if ( (m_strWord == "\"") || (m_strWord == "'") ) DelDes(OPun); if (pHomonym->m_LemSign != '+') m_bPredicted = true; return true; }
void CAnalyticFormBuilder::ChangeGrammemsAsAnalyticForm(CHomonym& H, const CHomonym& VerbHomonym) { THomonymGrammems old_grammems; H.Grammems.Swap(old_grammems); for (THomonymGrammems::TFormIter old = old_grammems.IterForms(); old.Ok(); ++old) for (THomonymGrammems::TFormIter verb = VerbHomonym.Grammems.IterForms(); verb.Ok(); ++verb) { Stroka strPos; // auxiliary verb grammems const TGramBitSet& VerbGrammems = *verb; // meaningful part grammems TGramBitSet MainGrammems = *old; // final grammems to set TGramBitSet ResultedGrammems; if (MainGrammems.Has(gInfinitive)) { ResultedGrammems = MainGrammems & ~TMorph::AllPOS(); if (VerbGrammems.Has(gImperative)) { // analytical form for imperatives in singular number does not exist if (VerbGrammems.Has(gSingular)) continue; ResultedGrammems.Set(gImperative); // "будем же жить!" } else ResultedGrammems |= VerbGrammems & NSpike::AllTimes; // "я стал пилить" или "стану писать" ResultedGrammems |= VerbGrammems & NSpike::AllPersons; ResultedGrammems |= VerbGrammems & NSpike::AllNumbers; ResultedGrammems |= VerbGrammems & NSpike::AllGenders; //copy all POS grammems from verb to main ResultedGrammems |= VerbGrammems & TMorph::AllPOS(); H.PutAuxArticle(SDictIndex(TEMPLATE_DICT, VerbHomonym.GetAuxArticleIndex(TEMPLATE_DICT))); strPos = "Г"; } else if (TMorph::IsShortParticiple(MainGrammems)) { // "*будем же взяты!" if (VerbGrammems.Has(gImperative)) continue; ResultedGrammems = MainGrammems & ~TMorph::AllPOS(); // remove any time grammems from participle ResultedGrammems &= ~NSpike::AllTimes; ResultedGrammems |= VerbGrammems & NSpike::AllPersons; ResultedGrammems |= VerbGrammems & NSpike::AllTimes; if (VerbGrammems.Has(gImperative)) // ??? the same check second time, always false? ResultedGrammems.Set(gImperative); strPos = "ПРИЧАСТИЕ"; ResultedGrammems |= TGramBitSet(gParticiple, gShort); } else if (TMorph::IsShortAdjective(MainGrammems)) { if (VerbGrammems.Has(gImperative)) continue; // будем cчитать, что "будем же красивы!" - это плохо! // на самом деле, просто не хочется вводить ее кучу кодов. ResultedGrammems = VerbGrammems; ResultedGrammems |= MainGrammems & (NSpike::AllNumbers | NSpike::AllGenders | TGramBitSet(gAnimated, gInanimated)); ResultedGrammems &= ~TMorph::AllPOS(); if (ResultedGrammems.Has(gActive)) ResultedGrammems &= ~TGramBitSet(gActive); ResultedGrammems |= TGramBitSet(gAdjective, gShort); strPos = "П"; } else if (MainGrammems.Has(gPraedic)) // "мне было больно" { ResultedGrammems = VerbGrammems; ResultedGrammems |= NSpike::AllCases & MainGrammems; //copied from PronounPredk code (commented below) - preserve cases if any ResultedGrammems &= ~TMorph::AllPOS(); if (ResultedGrammems.Has(gActive)) ResultedGrammems.Reset(gActive); strPos = "ПРЕДК"; ResultedGrammems |= MainGrammems & TMorph::AllPOS(); } else if (MainGrammems.Has(gComparative)) // он был больше тебя { ResultedGrammems = (VerbGrammems & ~TMorph::AllPOS()) | TGramBitSet(gComparative); if (ResultedGrammems.Has(gActive)) ResultedGrammems.Reset(gActive); strPos = "П"; ResultedGrammems |= MainGrammems & TMorph::AllPOS(); } else if (TMorph::IsFullAdjective(MainGrammems)) // resolve disambiguity of homonyms, because analytical forms with full adjectives do not exist. continue; // "стал писать" "стану писать" "стать писать" - совершенный вид if (VerbHomonym.Lemma == kStat) ResultedGrammems.Reset(gImperfect).Set(gPerfect); // if the auxiliary verb was an infinitive then it is all an infinitive // "быть лучше" или "должно быть принесено" if (VerbHomonym.HasGrammem(gInfinitive)) { ResultedGrammems &= ~TMorph::AllPOS(); ResultedGrammems.Set(gInfinitive); strPos = "ИНФИНИТИВ"; } else if (VerbHomonym.HasGrammem(gGerund)) // "будучи лишней" { ResultedGrammems &= ~TMorph::AllPOS(); ResultedGrammems.Set(gGerund); strPos = "ДЕЕПРИЧАСТИЕ"; } if (strPos.empty()) continue; /* do some corrections (code taken from RusGramTab.ProcessPOSAndGrammems) */ if (ResultedGrammems.HasAll(NSpike::AllCases | TGramBitSet(gAdjPronoun))) ResultedGrammems |= NSpike::AllGenders | NSpike::AllNumbers; if (ResultedGrammems.Has(gMasFem)) ResultedGrammems |= TGramBitSet(gMasculine, gFeminine); if (!ResultedGrammems.Has(gPraedic) && ResultedGrammems.HasAll(NSpike::AllCases) && !ResultedGrammems.Has(gSingular)) ResultedGrammems |= TGramBitSet(gSingular, gPlural); H.Grammems.AddForm(ResultedGrammems); } }
bool CMAPost::SetFioFormat (const CFIOFormat* Format, CLineIter it) { vector<SmallHomonymsVec> Hypots; SmallWordsVec FioWords; Hypots.resize(Format->m_Items.size()); int CountOfVariants = 1; for (long ItemNo = 0; ItemNo < Format->m_Items.size() && it != m_Words.end(); ItemNo++, it=NextNotSpace(it)) { FioWords.Add(it); CPostLemWord& W = *it; for (int HomNo=0; HomNo < W.GetHomonymsCount(); HomNo++) { CHomonym* pH = W.GetHomonym(HomNo); // иначе "“.≈. ќ“ ј«ј“№—я" будет ‘»ќ if (IsPartOfNonSingleOborot(pH)) return false; if (IsPartFio(*this, Format->m_Items[ItemNo], W, pH)) Hypots[ItemNo].Add(pH); }; if ( Hypots[ItemNo].empty() ) return false; CountOfVariants *= Hypots[ItemNo].size(); }; if (FioWords.size() != Format->m_Items.size()) return false; // не достроилось SmallHomonymsVec V; // текущий вариант vector<SmallHomonymsVec> Variants; Variants.reserve(CountOfVariants); V.m_ItemsCount = Hypots.size(); GetCommonVariants(Hypots, V, Variants, 0); if (Format->m_GleicheCase) for (long VarNo=0; VarNo < Variants.size(); VarNo++) { QWORD Grammems = rAllCases | rAllNumbers; for (long i=0; i < Variants[VarNo].size(); i++) { Grammems &= Variants[VarNo].m_Items[i]->m_iGrammems; }; if ((Grammems & rAllCases) == 0 || (Grammems & rAllNumbers) == 0 ) { Variants.erase(Variants.begin()+VarNo); VarNo--; }; }; if (Variants.empty()) return false; for (size_t i=0; i <FioWords.size(); i++) { CPostLemWord& W = *FioWords[i]; CHomonym* pH = Variants[0].m_Items[i]; W.SetHomonymsDel(true); pH->m_bDelete = false; pH->DeleteOborotMarks(); // удал¤ем однословные оборотыб (многословных там быть не может) W.DeleteMarkedHomonymsBeforeClauses(); if (W.HasDes(OSentEnd)) { /* если ‘»ќ содержало конец предложени¤, а после ‘»ќ нет ни одной пометы конца предложени¤, тогда надо поставить SENT_END на последнюю строчку ‘»ќ. */ W.DelDes(OSentEnd); if ( W.m_strWord!="." ) FioWords.back()->AddDes(OSentEnd); } } // ставим графем. пометы FioWords[0]->AddDes(OFAM1); FioWords.back()->AddDes(OFAM2); return true; };