Exemplo n.º 1
0
ET_ReturnCode CFormBuilderNouns::eBuild()
{
    ASSERT(m_pLexeme);   // we assume base class ctor took care of this

    ET_ReturnCode rc = H_NO_ERROR;

    m_pEndings = new CNounEndings(m_pLexeme);
    if (NULL == m_pEndings)
    {
        return H_ERROR_POINTER;
    }

    if (rc != H_NO_ERROR)
    {
        return rc;
    }

    ET_Animacy eAnimacy = m_pLexeme->eAnimacy();
    ET_Gender eoGender = m_pLexeme->eGender();

    CHasher gramIterator;
    gramIterator.Initialize(eoGender, eAnimacy);
    do
    {
        if ((L"мн." == m_pLexeme->sMainSymbol()) && (gramIterator.m_eNumber == NUM_SG))
        {
            continue;
        }

        if (NUM_PL == gramIterator.m_eNumber && 
            (CASE_PART == gramIterator.m_eCase || CASE_LOC == gramIterator.m_eCase))
        {
            continue;
        }

        if (CASE_PART == gramIterator.m_eCase && !m_pLexeme->bSecondGenitive())
        {
            continue;
        }

        if (CASE_LOC == gramIterator.m_eCase && !m_pLexeme->bSecondLocative())
        {
            continue;
        }

        // Handle acc ending
        ET_Case eEndingCase = gramIterator.m_eCase;
        if (CASE_ACC == gramIterator.m_eCase)
        {
            rc = eHandleAccEnding (gramIterator.m_eNumber, eEndingCase);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }
        }
        if (CASE_PART == gramIterator.m_eCase || CASE_LOC == gramIterator.m_eCase)
        {
            eEndingCase = CASE_DAT;
        }

        CEString sLemma (m_pLexeme->sGraphicStem());
        if (m_pLexeme->bHasIrregularForms())
        {
            bool bSkipRegular = false;
            rc = eCheckIrregularForms (gramIterator.m_eGender, 
                                       gramIterator.m_eAnimacy,
                                       gramIterator.m_eCase,
                                       eEndingCase,
                                       gramIterator.m_eNumber,
                                       bSkipRegular);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }
            if (bSkipRegular)
            {
                // Workaround for lack of "исх. форма иррег." mark in current source
                if (GENDER_M == gramIterator.m_eGender && NUM_SG == gramIterator.m_eNumber && CASE_NOM == gramIterator.m_eCase)
                {
                    m_bIrregularSourceForm = true;
                }
                continue;
            }
        }

        rc = eHandleStemAugment (sLemma, gramIterator.m_eNumber, gramIterator.m_eCase);
        if (rc != H_NO_ERROR)
        {
            return rc;
        }

        ET_StressLocation eStress = STRESS_LOCATION_UNDEFINED;
        if (CASE_LOC == gramIterator.m_eCase)
        {
            eStress = STRESS_LOCATION_ENDING;
        }
        else
        {
            rc = eGetStressType (gramIterator.m_eNumber, eEndingCase, eStress);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }
        }

        ((CNounEndings *)m_pEndings)->eSelect(gramIterator.m_eNumber, eEndingCase, eStress);
        int iNumEndings = m_pEndings->iCount();
        if (iNumEndings < 1)
        {
            if (m_pLexeme->iType() != 0)
            {
                ASSERT(0);
                ERROR_LOG(L"No endings");
            }
            continue;
        }

        CEString sSavedLemma (sLemma);      // lemma can change, e.g. because of a fleetimg vowel
        for (int iEnding = 0; iEnding < iNumEndings; ++iEnding, sLemma = sSavedLemma)
        {
            // Get ending and modify as necessary
            CEString sEnding;
            unsigned __int64 llEndingKey = -1;
            rc = m_pEndings->eGetEnding(iEnding, sEnding, llEndingKey);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }

            if (8 == m_pLexeme->iType() && GENDER_N != m_pLexeme->eGender())
            {
                if (sLemma.bEndsWithOneOf (L"шжчщц"))
                {
                    if (sEnding.bStartsWith (L"я"))
                    {
                        continue;
                    }
                }
                else
                {
                    if (sEnding.bStartsWith (L"а"))
                    {
                        continue;
                    }
                }
            }

            bool bHasFleetingVowel = false;
            rc = eFleetingVowelCheck (gramIterator.m_eNumber, 
                                      eEndingCase,
                                      gramIterator.m_eGender, 
                                      eStress,
                                      SUBPARADIGM_NOUN,
                                      sEnding,
                                      sLemma);
            if (rc != H_NO_ERROR)
            {
                continue;
            }

            vector<int> vecStress;
            rc = eGetStressPositions (sLemma, sEnding, eStress, vecStress);
            if (rc != H_NO_ERROR)
            {
                continue;
            }

            CWordForm * pWordForm = NULL;
            rc = eCreateFormTemplate (gramIterator.m_eNumber, gramIterator.m_eCase, sLemma, pWordForm);
            if (rc != H_NO_ERROR)
            {
                continue;
            }

            if (1 == vecStress.size() || m_pLexeme->bIsMultistressedCompound())
            {
                vector<int>::iterator itStressPos = vecStress.begin();
                for (; itStressPos != vecStress.end(); ++itStressPos)
                {
                    pWordForm->m_mapStress[*itStressPos] = STRESS_PRIMARY;  // primary
                    rc = eHandleYoAlternation (eStress, *itStressPos, pWordForm->m_sLemma, sEnding);
                    if (rc != H_NO_ERROR)
                    {
                        continue;
                    }
                    pWordForm->m_sEnding = sEnding;
                    pWordForm->m_llEndingDataId = llEndingKey;
                    pWordForm->m_sWordForm = pWordForm->m_sLemma + sEnding;
                }
                m_pLexeme->AddWordForm (pWordForm);                        
            }
            else
            {
                vector<int>::iterator itStressPos = vecStress.begin();
                for (; itStressPos != vecStress.end(); ++itStressPos)
                {
                    if (itStressPos != vecStress.begin())
                    {
                        CWordForm * pwfVariant = NULL;
                        CloneWordForm (pWordForm, pwfVariant);
                        pwfVariant->m_mapStress.clear();
                        pWordForm = pwfVariant;
                    }
                    pWordForm->m_mapStress[*itStressPos] = STRESS_PRIMARY;  // primary
                    rc = eHandleYoAlternation (eStress, *itStressPos, pWordForm->m_sLemma, sEnding);
                    if (rc != H_NO_ERROR)
                    {
                        continue;
                    }
                    pWordForm->m_sWordForm = pWordForm->m_sLemma + sEnding;
                    pWordForm->m_sEnding = sEnding;
                    pWordForm->m_llEndingDataId = llEndingKey;

                    m_pLexeme->AddWordForm (pWordForm);
                }
            }
        }   //  for (int iEnding = 0; ... )
    
    } while (gramIterator.bIncrement());

    return H_NO_ERROR;

}    //  eBuildNounForms()
ET_ReturnCode CFormBuilderPronounAdj::eBuild()
{
	ASSERT(m_pLexeme);   // we assume base class ctor took care of this

    ET_ReturnCode rc = H_NO_ERROR;

    m_pEndings = new CAdjPronounEndings(m_pLexeme);
    if (NULL == m_pEndings)
    {
        return H_ERROR_POINTER;
    }

    CHasher gramTmp;
    gramTmp.Initialize (POS_PRONOUN_ADJ);
    gramTmp.SetParadigm (SUBPARADIGM_PRONOUN_ADJ);
    do
    {
        bool bSkipRegular = false;
        if (m_pLexeme->bHasIrregularForms())
        {
            int iPronAdj = iGramHash (m_pLexeme->ePartOfSpeech(),
                                      gramTmp.m_eCase, gramTmp.m_eNumber,
                                      gramTmp.m_eGender, gramTmp.m_eAnimacy);

            if (m_pLexeme->bHasIrregularForm(iPronAdj))
            {
                map<CWordForm *, bool> mapPronAdj;
                rc = m_pLexeme->eGetIrregularForms(iPronAdj, mapPronAdj);
                if (rc != H_NO_ERROR)
                {
                    return rc;
                }

                map<CWordForm *, bool>::iterator it = mapPronAdj.begin();
                for (; it != mapPronAdj.end(); ++it)
                {
                    CWordForm * pWordForm = NULL;
                    rc = eCreateFormTemplate(gramTmp.m_eGender,
                                             gramTmp.m_eNumber,
                                             gramTmp.m_eCase,
                                             gramTmp.m_eAnimacy,
                                             L"",
                                             -1,
                                             pWordForm);
                    if (rc != H_NO_ERROR)
                    {
                        continue;
                    }

                    pWordForm->m_sWordForm = (*it).first->m_sWordForm;
                    pWordForm->m_mapStress = (*it).first->m_mapStress;
                    m_pLexeme->AddWordForm(pWordForm);

                    bSkipRegular = !(*it).second;  // i.e. no regular variant
                }
            }
        }       //  if (m_pLexeme->m_bHasIrregularForms)

        if (bSkipRegular)
        {
            continue;
        }

        ET_StressLocation eStressType = STRESS_LOCATION_UNDEFINED;
        switch (m_pLexeme->eAccentType1())
        {
            case AT_A:
            {
                eStressType = STRESS_LOCATION_STEM;
                break;
            }
            case AT_B:
            {
                eStressType = STRESS_LOCATION_ENDING;
                break;
            }
            case AT_F:
            {
                if (NUM_SG == gramTmp.m_eNumber)
                {
                    eStressType = STRESS_LOCATION_ENDING;
                }
                else if (CASE_NOM == gramTmp.m_eCase)
                {
                    eStressType = STRESS_LOCATION_STEM;
                }
                else if (CASE_ACC == gramTmp.m_eCase && ANIM_NO == gramTmp.m_eAnimacy)
                {
                    eStressType = STRESS_LOCATION_STEM;
                }
                else
                {
                    eStressType = STRESS_LOCATION_ENDING;
                }
                break;
            }
            default:
            {
                ASSERT(0);
                ERROR_LOG (L"Illegal accent type.");
                return H_ERROR_UNEXPECTED;
            }

        }   //  switch (m_pLexeme->eAccentType1())

        rc = ((CAdjPronounEndings *)m_pEndings)->eSelect(gramTmp.m_eGender, gramTmp.m_eNumber, gramTmp.m_eCase, gramTmp.m_eAnimacy);
        int iNumEndings = m_pEndings->iCount();
        if (iNumEndings < 1)
        {
            ERROR_LOG (L"No endings");
            continue;
        }

        for (int iEnding = 0; iEnding < iNumEndings; ++iEnding)
        {
            CEString sEnding;
            unsigned __int64 llEndingKey = -1;
            rc = m_pEndings->eGetEnding(iEnding, sEnding, llEndingKey);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }

            if (m_pLexeme->bFleetingVowel() &&
                (gramTmp.m_eCase != CASE_NOM || gramTmp.m_eGender != GENDER_M) &&
                (gramTmp.m_eCase != CASE_ACC || gramTmp.m_eGender != GENDER_M ||
                                                    gramTmp.m_eAnimacy != ANIM_NO))
            {
                rc = eFleetingVowelCheck (gramTmp.m_eNumber, gramTmp.m_eCase,
                                          gramTmp.m_eGender, eStressType,
                                          SUBPARADIGM_LONG_ADJ, sEnding, m_sLemma);
                if (rc != H_NO_ERROR)
                {
                    return rc;
                }
            }

            CWordForm * pWordForm = NULL;
            rc = eCreateFormTemplate(gramTmp.m_eGender, 
                                     gramTmp.m_eNumber, 
                                     gramTmp.m_eCase, 
                                     gramTmp.m_eAnimacy, 
                                     sEnding, 
                                     llEndingKey,
                                     pWordForm);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }

            vector<int> vecStress;
            rc = eGetStressPositions (sEnding, eStressType, vecStress);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }

            if (1 == vecStress.size() || m_pLexeme->bIsMultistressedCompound())
            {
                vector<int>::iterator itStressPos = vecStress.begin();
                for (; itStressPos != vecStress.end(); ++itStressPos)
                {
                    pWordForm->m_mapStress[*itStressPos] = STRESS_PRIMARY;
                }
                m_pLexeme->AddWordForm (pWordForm);
            }
            else
            {
                vector<int>::iterator itStressPos = vecStress.begin();
                for (; itStressPos != vecStress.end(); ++itStressPos)
                {
                    if (itStressPos != vecStress.begin())
                    {
                        CWordForm * pWfVariant = NULL;
                        CloneWordForm (pWordForm, pWfVariant);
                        pWordForm = pWfVariant;
                    }
                    pWordForm->m_mapStress[*itStressPos] = STRESS_PRIMARY;
                    m_pLexeme->AddWordForm (pWordForm);
                }
            }
        }
    } while (gramTmp.bIncrement());

    return H_NO_ERROR;

}   //  eBuild (...)