コード例 #1
0
ファイル: word.cpp プロジェクト: Frankie-666/tomita-parser
void CWord::DeleteHomonym(CHomonym& hom)
{
    if (!hom.IsDeleted()) {
        ++m_DeletedHomCount;
        hom.Delete();
    }
}
コード例 #2
0
ファイル: homonym.cpp プロジェクト: dubrousky/tomita-parser
void CHomonym::ResetGenderFromHomonym(const CHomonym& h)
{
    if (h.HasGrammem(gFeminine))
        Grammems.Replace(gMasculine, gFeminine);
    if (h.HasGrammem(gMasculine))
        Grammems.Replace(gFeminine, gMasculine);
}
コード例 #3
0
void CMultiWordCreator::AddQuoteMultiWord(CWordSequence& ws, const TArticleRef& article)
{
    SWordHomonymNum wh;
    Wtroka str;
    CWord* pNewWord = GetWordForMultiWord(ws, str, wh);
    if (pNewWord->m_SourceWords.Size() == 1 && pNewWord->HasOnlyUnknownPOS()) {
        size_t firstId = pNewWord->IterHomonyms().GetID();
        CHomonym& h = pNewWord->GetRusHomonym(firstId);
        h.SetSourceWordSequence(&ws);
        h.PutArticle(article);
        wh.m_HomNum = firstId;
    } else {
        pNewWord->m_SourceWords.SetPair(ws.FirstWord(), ws.LastWord());
        if (str.size() == 0)
            str = pNewWord->m_txt;
        TMorph::ToLower(str);
        CHomonym* pNewHom = new CHomonym(TMorph::GetMainLanguage(), str);
        pNewHom->SetSourceWordSequence(&ws);
        pNewHom->PutArticle(article);
        wh.m_HomNum = pNewWord->AddRusHomonym(pNewHom);
    }

    if (article.AuxDic().IsValid()) {
        const article_t* pArt =  GlobalDictsHolder->GetAuxArticle(article.AuxDic());
        YASSERT(pArt != NULL);
        AddFoundArticle(pArt->get_kw_type(), pArt->get_title(), wh);
    } else {
        YASSERT(!article.Gzt().Empty());
        AddFoundArticle(article.Gzt().GetType(), article.Gzt().GetTitle(), wh);
    }

    m_wordSequences.push_back(&ws);
}
コード例 #4
0
ファイル: word.cpp プロジェクト: Frankie-666/tomita-parser
void CWord::UndeleteHomonym(CHomonym& hom)
{
    if (hom.IsDeleted()) {
        if (m_DeletedHomCount > 0)
            --m_DeletedHomCount;
        hom.Undelete();
    }
}
コード例 #5
0
ファイル: word.cpp プロジェクト: Frankie-666/tomita-parser
void CWord::AddFio(yset<Wtroka>& fioStrings, bool bIndexed)
{
    yset<Wtroka>::iterator it = fioStrings.begin();
    for (; it != fioStrings.end(); it++) {
        CHomonym* pNewHom = new CHomonym(TMorph::GetMainLanguage(), *it);
        if (*it == g_strFIONonTerminal)
            pNewHom->SetIsDictionary(true);
        else
            pNewHom->SetIsDictionary(bIndexed);
        m_variant.push_back(pNewHom);
    }
}
コード例 #6
0
ファイル: word.cpp プロジェクト: Frankie-666/tomita-parser
bool CWord::CanAddHomonym(const CHomonym& h) const
{
    //make sure that abbreviations (e.g. PO, OAO, etc.) are uppercased
    if (h.HasGrammem(gAbbreviation) && !m_bIgnoreUpperCase) {
        if (!m_bUp)
            return false;
        for (size_t i = 0; i < m_txt.size(); ++i)
            if (!::IsUpper(m_txt[i]))
                return false;
    }
    if ((h.IsGeo() || h.IsName()) && h.IsMorphNoun() && !m_bUp  && !m_bIgnoreUpperCase)
        return false;

    return true;
}
コード例 #7
0
ファイル: LemWord.cpp プロジェクト: eamosov/lspl
bool CLemWord::AddNextHomonym(const char* sPlmLine)
{
    char buffer[CriticalGraphemLineLength*2];
    assert (strlen(sPlmLine) < CriticalGraphemLineLength*2);
    strcpy(buffer, sPlmLine);
    char* strPlmLine = buffer;

	assert (strPlmLine[0] == ' ');
	assert (strPlmLine[1] == ' ');
	strPlmLine += 2;

    assert ( GetHomonymsCount() );
	
	size_t iPlmLineLen = strlen(strPlmLine);
	rtrim(strPlmLine,&iPlmLineLen);

	size_t iFirstFieldLen = strcspn(strPlmLine," ");

	size_t iSomeUnusefulDigitsLen = strspn(strPlmLine + iFirstFieldLen," -1234567890");

	CHomonym* pHomonym = AddNewHomonym();

	strPlmLine += iFirstFieldLen + iSomeUnusefulDigitsLen;

	int MorphSectionOffset = ProcessGraphematicalDescriptors(strPlmLine);

	if (!pHomonym->ProcessLemmaAndGrammems(strPlmLine+MorphSectionOffset)) return false;

    {
        const CHomonym* pFirstHom = GetHomonym(0);
        pHomonym->m_bInOb = pFirstHom->m_bInOb;
        pHomonym->m_bOborot1 = pFirstHom->m_bOborot1;
        pHomonym->m_bOborot2 = pFirstHom->m_bOborot2;
        pHomonym->m_OborotNo = pFirstHom->m_OborotNo;
    }

	InitLevelSpecific(pHomonym);

	return true;

}
コード例 #8
0
ファイル: word.cpp プロジェクト: Frankie-666/tomita-parser
void CWord::AddHyphenSurnameLemma(int iH, const THomonymGrammems& forms, const Wtroka& strLemma)
{
    if (iH == -1) {
        if (m_Homonyms.size() == 1) {
            m_Homonyms[0]->SetLemma(strLemma);
            m_Homonyms[0]->SetGrammems(forms);
            m_Homonyms[0]->SetNameType(Surname);
            return;
        } else {
            CHomonym* pH = new CHomonym(TMorph::GetMainLanguage(), strLemma);
            pH->SetGrammems(forms);
            pH->SetNameType(Surname);
            AddRusHomonym(pH);
            return;
        }
    } else {
        CHomonym& h = GetRusHomonym(iH);
        h.SetLemma(strLemma);
        h.SetNameType(Surname);
    }
}
コード例 #9
0
ファイル: LemWord.cpp プロジェクト: eamosov/lspl
bool CLemWord::ProcessPlmLineForTheFirstHomonym(const char* sPlmLine, MorphLanguageEnum langua, int& OborotNo)
{
    char buffer[CriticalGraphemLineLength*2];
    assert (strlen(sPlmLine) < CriticalGraphemLineLength*2);
    strcpy(buffer, sPlmLine);
    char* strPlmLine = buffer;


    // откусываю признаки конца строки, если они по¤вились
    size_t iPlmLineLen = strlen(strPlmLine);
    rtrim(strPlmLine,&iPlmLineLen);

    size_t iFirstFieldLen = strcspn(strPlmLine," ");
    char WordBuffer[CriticalTokenLength+1];
    strncpy(WordBuffer, strPlmLine, iFirstFieldLen);
    WordBuffer[iFirstFieldLen] = '\0';
    SetWordStr(WordBuffer, langua);


    size_t iSomeUnusefulDigitsLen = strspn(strPlmLine + iFirstFieldLen," -1234567890");

    //  reading file position of an item from graline
    if (sscanf(strPlmLine + iFirstFieldLen, "%i %i", &m_GraphematicalUnitOffset, &m_TokenLengthInFile) != 2)
        return false;

    strPlmLine += iFirstFieldLen + iSomeUnusefulDigitsLen;

    int MorphSectionOffset = ProcessGraphematicalDescriptors(strPlmLine);

    if( m_bSpace )
        return true;

    CHomonym*  pHomonym  = AddNewHomonym();

    if (HasDes(OEXPR1))
        OborotNo = ParseOborotNo(m_UnparsedGraphemDescriptorsStr);

    if (OborotNo != -1)
    {
        pHomonym->m_OborotNo =  OborotNo;
        pHomonym->m_bInOb =  true;
        pHomonym->m_bOborot1 = HasDes(OEXPR1);
        pHomonym->m_bOborot2 = HasDes(OEXPR2);
    }
    if (HasDes(OEXPR2))
        OborotNo = -1;

    if( MorphSectionOffset == strlen(strPlmLine) )
        pHomonym->SetLemma(m_strWord);
    else
        if (!pHomonym->ProcessLemmaAndGrammems(strPlmLine+MorphSectionOffset))
            return false;

    InitLevelSpecific(pHomonym);

    if	(		(m_strWord == "\"")
        ||	(m_strWord == "'")
        )
        DelDes(OPun);




    if (pHomonym->m_LemSign != '+')
        m_bPredicted = true;

    return true;
}
コード例 #10
0
void CAnalyticFormBuilder::ChangeGrammemsAsAnalyticForm(CHomonym& H, const CHomonym& VerbHomonym)
{
    THomonymGrammems old_grammems;
    H.Grammems.Swap(old_grammems);

    for (THomonymGrammems::TFormIter old = old_grammems.IterForms(); old.Ok(); ++old)
        for (THomonymGrammems::TFormIter verb = VerbHomonym.Grammems.IterForms(); verb.Ok(); ++verb) {
            Stroka strPos;

            // auxiliary verb grammems
            const TGramBitSet& VerbGrammems = *verb;
            // meaningful part grammems
            TGramBitSet MainGrammems = *old;

            // final grammems to set
            TGramBitSet ResultedGrammems;

            if (MainGrammems.Has(gInfinitive)) {
                ResultedGrammems = MainGrammems & ~TMorph::AllPOS();
                if (VerbGrammems.Has(gImperative)) {
                    // analytical form for imperatives in singular number does not exist
                    if (VerbGrammems.Has(gSingular))
                        continue;
                    ResultedGrammems.Set(gImperative); // "будем же жить!"
                } else
                    ResultedGrammems |= VerbGrammems & NSpike::AllTimes; // "я стал пилить" или "стану писать"

                ResultedGrammems |= VerbGrammems & NSpike::AllPersons;
                ResultedGrammems |= VerbGrammems & NSpike::AllNumbers;
                ResultedGrammems |= VerbGrammems & NSpike::AllGenders;

                //copy all POS grammems from verb to main
                ResultedGrammems |= VerbGrammems & TMorph::AllPOS();

                H.PutAuxArticle(SDictIndex(TEMPLATE_DICT, VerbHomonym.GetAuxArticleIndex(TEMPLATE_DICT)));
                strPos = "Г";
            } else if (TMorph::IsShortParticiple(MainGrammems)) {
                // "*будем же взяты!"
                if (VerbGrammems.Has(gImperative))
                    continue;

                ResultedGrammems = MainGrammems & ~TMorph::AllPOS();
                // remove any time grammems from participle
                ResultedGrammems &= ~NSpike::AllTimes;

                ResultedGrammems |= VerbGrammems & NSpike::AllPersons;
                ResultedGrammems |= VerbGrammems & NSpike::AllTimes;

                if (VerbGrammems.Has(gImperative))  // ??? the same check second time, always false?
                    ResultedGrammems.Set(gImperative);

                strPos = "ПРИЧАСТИЕ";
                ResultedGrammems |= TGramBitSet(gParticiple, gShort);
            } else if (TMorph::IsShortAdjective(MainGrammems)) {
                if (VerbGrammems.Has(gImperative))
                    continue; // будем cчитать, что "будем же красивы!" - это плохо!
                              // на самом деле, просто не хочется вводить  ее кучу кодов.

                ResultedGrammems =  VerbGrammems;
                ResultedGrammems |= MainGrammems & (NSpike::AllNumbers | NSpike::AllGenders | TGramBitSet(gAnimated, gInanimated));
                ResultedGrammems &= ~TMorph::AllPOS();

                if (ResultedGrammems.Has(gActive))
                    ResultedGrammems &= ~TGramBitSet(gActive);

                ResultedGrammems |= TGramBitSet(gAdjective, gShort);
                strPos = "П";
            } else if (MainGrammems.Has(gPraedic))     // "мне было больно"
            {
                ResultedGrammems = VerbGrammems;
                ResultedGrammems |= NSpike::AllCases & MainGrammems;    //copied from PronounPredk code (commented below) - preserve cases if any
                ResultedGrammems &= ~TMorph::AllPOS();

                if (ResultedGrammems.Has(gActive))
                    ResultedGrammems.Reset(gActive);

                strPos = "ПРЕДК";
                ResultedGrammems |= MainGrammems & TMorph::AllPOS();
            } else if (MainGrammems.Has(gComparative))       // он был больше тебя
            {
                ResultedGrammems = (VerbGrammems & ~TMorph::AllPOS()) | TGramBitSet(gComparative);
                if (ResultedGrammems.Has(gActive))
                    ResultedGrammems.Reset(gActive);

                strPos = "П";
                ResultedGrammems |= MainGrammems & TMorph::AllPOS();
            } else if (TMorph::IsFullAdjective(MainGrammems))
                // resolve disambiguity of homonyms, because analytical forms with full adjectives do not exist.
                continue;

            // "стал писать" "стану писать" "стать писать" - совершенный вид
            if (VerbHomonym.Lemma == kStat)
                ResultedGrammems.Reset(gImperfect).Set(gPerfect);

            // if the auxiliary verb was an infinitive then it is all an infinitive
            //  "быть  лучше" или "должно быть принесено"
            if (VerbHomonym.HasGrammem(gInfinitive)) {
                ResultedGrammems &= ~TMorph::AllPOS();
                ResultedGrammems.Set(gInfinitive);
                strPos = "ИНФИНИТИВ";
            } else if (VerbHomonym.HasGrammem(gGerund))     //  "будучи лишней"
            {
                ResultedGrammems &= ~TMorph::AllPOS();
                ResultedGrammems.Set(gGerund);

                strPos = "ДЕЕПРИЧАСТИЕ";
            }

            if (strPos.empty())
                continue;

            /* do some corrections (code taken from RusGramTab.ProcessPOSAndGrammems) */
            if (ResultedGrammems.HasAll(NSpike::AllCases | TGramBitSet(gAdjPronoun)))
                ResultedGrammems |= NSpike::AllGenders | NSpike::AllNumbers;

            if (ResultedGrammems.Has(gMasFem))
                ResultedGrammems |= TGramBitSet(gMasculine, gFeminine);

            if (!ResultedGrammems.Has(gPraedic) && ResultedGrammems.HasAll(NSpike::AllCases) && !ResultedGrammems.Has(gSingular))
                ResultedGrammems |= TGramBitSet(gSingular, gPlural);

            H.Grammems.AddForm(ResultedGrammems);
        }
}
コード例 #11
0
ファイル: Fio.cpp プロジェクト: eamosov/lspl
bool CMAPost::SetFioFormat (const CFIOFormat* Format, CLineIter it) 
{
   vector<SmallHomonymsVec> Hypots;
   SmallWordsVec FioWords;

   Hypots.resize(Format->m_Items.size());
   
   int CountOfVariants = 1;
   for (long ItemNo = 0; ItemNo < Format->m_Items.size() && it != m_Words.end(); ItemNo++, it=NextNotSpace(it))
   {
        FioWords.Add(it);
		CPostLemWord& W = *it;
        
        for (int HomNo=0; HomNo < W.GetHomonymsCount();  HomNo++)
		{
            CHomonym* pH = W.GetHomonym(HomNo);
			// иначе "“.≈. ќ“ ј«ј“№—я" будет ‘»ќ
            if (IsPartOfNonSingleOborot(pH)) return false;

			if (IsPartFio(*this, Format->m_Items[ItemNo], W, pH))
					Hypots[ItemNo].Add(pH);
		};
        if ( Hypots[ItemNo].empty() ) return false;
		CountOfVariants *= Hypots[ItemNo].size();
   };

   if (FioWords.size() != Format->m_Items.size()) return false; // не достроилось

   SmallHomonymsVec V; // текущий вариант 
   vector<SmallHomonymsVec> Variants;
   Variants.reserve(CountOfVariants);
   V.m_ItemsCount = Hypots.size();
   GetCommonVariants(Hypots, V, Variants, 0);

   if (Format->m_GleicheCase)
	for (long VarNo=0; VarNo < Variants.size(); VarNo++)
	{
		QWORD Grammems = rAllCases | rAllNumbers;
		for (long i=0; i < Variants[VarNo].size(); i++)
		{
            Grammems  &=   Variants[VarNo].m_Items[i]->m_iGrammems;
		};
		if ((Grammems & rAllCases) == 0 || (Grammems & rAllNumbers) == 0 )
		{
			Variants.erase(Variants.begin()+VarNo);
			VarNo--;
		};

	};

   if (Variants.empty()) return false;

   for (size_t i=0; i <FioWords.size(); i++)
   {
       CPostLemWord& W = *FioWords[i];
       CHomonym* pH = Variants[0].m_Items[i];
       W.SetHomonymsDel(true);
       pH->m_bDelete = false;
       pH->DeleteOborotMarks(); // удал¤ем однословные оборотыб (многословных там быть не может)
       W.DeleteMarkedHomonymsBeforeClauses();
	   if (W.HasDes(OSentEnd))
       {
           /*
             если ‘»ќ содержало конец предложени¤, а после ‘»ќ нет ни одной пометы 
	         конца предложени¤, тогда надо поставить SENT_END на последнюю строчку ‘»ќ.
           */
           W.DelDes(OSentEnd);
		   if ( W.m_strWord!="." )
			   FioWords.back()->AddDes(OSentEnd);
       }
   }

   // ставим графем. пометы
   FioWords[0]->AddDes(OFAM1);
   FioWords.back()->AddDes(OFAM2);

   return true;

};