Ejemplos de CEString en C++ (Cpp)

Lenguaje de programación: C++ (Cpp)

Clase / Tipo: CEString

Ejemplos en hotexamples.com: 14

C++ (Cpp) CEString - 14 ejemplos encontrados. Estos son los ejemplos en C++ (Cpp) del mundo real mejor valorados de CEString extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

uiLength(8)

sSubstr(6)

uiNSyllables(4)

bStartsWith(3)

sErase(3)

uiFind(2)

bEndsWith(2)

uiFindOneOf(2)

bEndsWithOneOf(2)

uiGetSyllableFromVowelPos(2)

bRegexMatch(2)

stGetTokenFromOffset(1)

uiFindFirstOf(1)

uiFindLastOf(1)

stGetToken(1)

uiFindNoCase(1)

Erase(1)

uiGetNumOfSyllables(1)

uiGetFieldLength(1)

uiGetNumOfFields(1)

uiGetNumOfTokens(1)

uiGetTokenNum(1)

uiGetVisibleLength(1)

uiGetVowelPos(1)

uiNFields(1)

uiNTokens(1)

stGetField(1)

sGetRegexMatch(1)

sInsert(1)

bGetPrevToken(1)

SetBreakChars(1)

SetVowels(1)

Trim(1)

TrimLeft(1)

TrimRight(1)

bEndsWithNoCase(1)

bEndsWithOneOfNoCase(1)

bGetNextToken(1)

bIsEmpty(1)

sGetToken(1)

bRegexSearch(1)

bStartsWithNoCase(1)

bStartsWithOneOf(1)

bStartsWithOneOfNoCase(1)

eGetTokenType(1)

rstGetToken(1)

sGetField(1)

Reverse(1)

uiRFind(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: LexPreprocessor.cpp Proyecto: kbogatyrev/Zal-Windows

CEString CT_LexPreprocessor::sInsertStress(int iLetter, CEString s_)
{
    if (iLetter >= 0 && iLetter < (int)(s_.uiLength()))
    {
        return s_.sSubstr(0, iLetter) + L"<" + s_.sSubstr(iLetter, s_.uiLength() - iLetter);
    }
    return s_;
}

Ejemplo n.º 2

Mostrar archivo

Archivo: Analyzer.cpp Proyecto: kbogatyrev/Zal-Windows

bool CAnalyzer::bIsValidLemma(CEString sWf)
{
//    if (!regex_search(sWf, (const wregex)(L"[аеёиоуыэюяАЕЁИОУЫЭЮЯ]")))
    if (!sWf.bRegexSearch (L"[аеёиоуыэюяАЕЁИОУЫЭЮЯ]"))
    {
        return false;
    }
//    if (regex_search(sWf, (const wregex)(L"[аеёиоуыэюяъь][ьъ]")))
    if (sWf.bRegexSearch (L"[аеёиоуыэюяъь][ьъ]"))
    {
        return false;
    }
//    if (regex_search(sWf, (const wregex)(L"ъ[аоуыэи]")))
    if (sWf.bRegexSearch (L"ъ[аоуыэи]"))
    {
        return false;
    }
//    if (regex_search(sWf, (const wregex)(L"[аоэуе][аоэуы]ть$")))
    if (sWf.bRegexSearch (L"[аоэуе][аоэуы]ть$"))
    {
        return false;
    }
//    if (regex_search(sWf, (const wregex)(L"[кгхц]ь$")))
    if (sWf.bRegexSearch (L"[кгхц]ь$"))
    {
        return false;
    }
//    if (regex_search(sWf, (const wregex)(L"[кгх]ый$")))
    if (sWf.bRegexSearch (L"[кгх]ый$"))
    {
        return false;
    }
//    if (regex_search(sWf, (const wregex)(L"[жчшщ]ы")))
    if (sWf.bRegexSearch (L"[жчшщ]ы"))
    {
        return false;
    }
//    if (regex_search(sWf, (const wregex)(L"ы$")))
    if (sWf.bRegexSearch (L"ы$"))
    {
        return false;
    }
//    if (regex_search(sWf, (const wregex)(L"[бвгджзклмнпрстфхцчшщ](й|ъ$)")))
    if (sWf.bRegexSearch (L"[бвгджзклмнпрстфхцчшщ](й|ъ$)"))
    {
        return false;
    }
//    if (regex_search(sWf, (const wregex)(L"[бвгджзклмнпрстфхцчшщ]{4}$")))
    if (sWf.bRegexSearch (L"[бвгджзклмнпрстфхцчшщ]{4}$"))
    {
        return false;
    }
    return true;
}

Ejemplo n.º 3

Mostrar archivo

Archivo: LexPreprocessor.cpp Proyecto: kbogatyrev/Zal-Windows

int CT_LexPreprocessor::iDeleteStress(CEString& s_)
// Deletes the stress and returns the number of the letter it follows
{
    unsigned int uiStressPos = s_.uiFind(L"<");
    if (uiStressPos >= s_.uiLength())
    {
        return -1;
    }
    s_ = s_.sSubstr(0, uiStressPos) 
        + s_.sSubstr(uiStressPos + 1, s_.uiLength() - uiStressPos - 1);
    return (int)uiStressPos;
}

Ejemplo n.º 4

Mostrar archivo

Archivo: Lexeme.cpp Proyecto: kbogatyrev/Zal-Windows

ET_ReturnCode CLexeme::eGetAlternatingPreverb (const CEString& sVerbForm, CEString& sPreverb, bool& bVoicing)
{
    if (!m_stProperties.bFleetingVowel)
    {
        return H_NO_MORE;
    }

// types 5, 6, 7, 8, 9, 11, 14
    bool bPreverb = false;

    vector<CEString>::iterator itP = m_vecAlternatingPreverbs.begin();
    for (; itP != m_vecAlternatingPreverbs.end()&&!bPreverb; ++itP)
    {
        if (sVerbForm.bStartsWith (*itP))
        {
            sPreverb = *itP;
            bPreverb = true;
        }
    }

    if (!bPreverb)
    {
        itP = m_vecAlternatingPreverbsWithVoicing.begin();
        for (; itP != m_vecAlternatingPreverbsWithVoicing.end()&&!bPreverb; ++itP)
        {
            if (sVerbForm.bStartsWith (*itP))
            {
                sPreverb = *itP;    
                bPreverb = true;
                bVoicing = true;
            }
        }
    }

    if (!bPreverb)
    {
        return H_FALSE;
    }

    if (sVerbForm.uiLength() < sPreverb.uiLength() + 2)
    {
        ASSERT(0);
        ERROR_LOG (L"Stem too short.");
        return H_ERROR_INVALID_ARG;
    }

    return H_NO_ERROR;

}       //  eGetAlternatingPreverb(...)

Ejemplo n.º 5

Mostrar archivo

Archivo: FormBuilderPronounAdj.cpp Proyecto: kbogatyrev/Zal-Windows

ET_ReturnCode CFormBuilderPronounAdj::eGetStressPositions (const CEString& sEnding,
                                                           ET_StressLocation eStressType,
                                                           vector<int>& vecStressPos)
{
    ET_ReturnCode rc = H_NO_ERROR;

//    CEString sLemma (sLemma);
    m_sLemma.SetVowels (g_szRusVowels);

    int iStressPos = -1;

    if (STRESS_LOCATION_STEM == eStressType)
    {
        rc = eGetStemStressPositions (m_sLemma, m_eSubparadigm, vecStressPos);
        if (rc != H_NO_ERROR)
        {
            return rc;
        }
    }
    else if (STRESS_LOCATION_ENDING == eStressType)
    {
        if (sEnding.uiNSyllables() < 1)
        {
            iStressPos = m_sLemma.uiNSyllables() - 1;
        }
        else
        {
            if (L"мс-п" == m_pLexeme->sInflectionType() &&
                (L"его" == sEnding || L"ему" == sEnding ||
                 L"ого" == sEnding || L"ому" == sEnding))
            {
                iStressPos = m_sLemma.uiNSyllables() + 1;    // одног<о, твоем<у
            }
            else
            {
                iStressPos = m_sLemma.uiNSyllables();
            }
        }

//        hGetEndingStressPosition (str_Lemma, sEnding, i_stressPos);
        vecStressPos.push_back (iStressPos);
    }
    else
    {
        ASSERT(0);
        ERROR_LOG (L"Illegal stress type.");
        return H_ERROR_INVALID_ARG;
    }

    return rc;

}   //  eGetStressPositions (...)

Ejemplo n.º 6

Mostrar archivo

Archivo: Analyzer.cpp Proyecto: kbogatyrev/Zal-Windows

int CAnalyzer::iCheckEndings(vector<CHasher>& vecPossibleWordforms,
                             vector<stStemLinks>& vecStems,
                             CEString sLeft,
                             CEString sRight,
                             int iStressPosEnding)
// If pvec_stems_id IS NOT empty:
// For every stem in pvec_stems_id, take the corresponding endings table
// and look whether it contains an ending equal to sRight;
// for every such ending, add a wordform to vecPossibleWordforms.
//
// If pvec_stems_id IS empty:
// Look for an ending equal to sRight; for every such ending,
// build a wordform and store it in vecPossibleWordforms.
// (Identical wordforms are stored as one wordform.)
{
    if (m_pDb == NULL) // || vecStems == NULL)
    {
        return -1;
    }
    static vector<int> vecGram;
    CEString str_query, sLemma;
    vector<CEString> vecLemma;

    for (vector<stStemLinks>::iterator itStems = vecStems.begin();
        itStems != vecStems.end(); itStems++)
    {
        // For each *itStems look up the endings table ID in DB, then in this table try to find
        // endings which are equal to sRight. For each ending found, write the parameters
        // to tmpWf and then push_back tmpWf to vecPossibleWordforms:
        vecGram.clear();
        vecGram = arr_freq_endings[(*itStems).iEndingsLink].m_vecFind (sRight, iStressPosEnding);
        if (vecGram.empty())
        {
            continue;
        }
        for (vector<int>::iterator iter_endings = vecGram.begin();
            iter_endings != vecGram.end(); iter_endings++)
        {
            CHasher tmpWf;
            tmpWf.hDecodeHash(*iter_endings);
            tmpWf.m_llLexemeId = (*itStems).llLexemeId;
            tmpWf.m_sLemma = (*itStems).sLemma;
            //tmpWf.str_WordForm = sLeft + sRight;
            //h_AddClassifyingCategories(&tmpWf);
            vecPossibleWordforms.push_back (tmpWf);
        }
        vecLemma.clear(); // that vector is different for every stem found
    }

    if (vecStems.empty())
    // Try to guess the lexeme
    {
        if (sLeft.uiLength() <= 2)
        {
            return 0;
        }
        vector<int> vec_i_possible_ETs;
//        pair<unordered_multimap<wstring, int>::iterator,
//             unordered_multimap<wstring, int>::iterator> pair_search_result = umap_endings2subtbl.equal_range((wstring)sRight);
        pair<multimap<CEString, int>::iterator,
             multimap<CEString, int>::iterator> pair_search_result = umap_endings2subtbl.equal_range(sRight);
        for (; pair_search_result.first != pair_search_result.second; ++pair_search_result.first)
        {
            vec_i_possible_ETs.push_back(pair_search_result.first->second);
        }
        for (vector<int>::iterator iter_ET = vec_i_possible_ETs.begin();
             iter_ET != vec_i_possible_ETs.end();
             ++iter_ET)
        {
            if (arr_freq_endings[*iter_ET].m_sStemFinal.uiLength() > 0 &&
//                !regex_match(sLeft, (const wregex)(L"^.*(" + arr_freq_endings[*iter_ET].m_sStemFinale + L")$")))
                !sLeft.bRegexMatch (L"^.*(" + arr_freq_endings[*iter_ET].m_sStemFinal + L")$"))
            {
                continue;
            }
            if (sLeft.uiLength() <= arr_freq_endings[*iter_ET].m_iCutRight)
            {
                continue;
            }
            vecGram.clear();
            vecGram = arr_freq_endings[*iter_ET].m_vecFind(sRight, -2);
            if (vecGram.empty())
            {
                continue;
            }
            for (vector<int>::iterator itHash = vecGram.begin();
                 itHash != vecGram.end(); ++itHash)
            {
                CHasher tmpWf;
                tmpWf.m_sLemma = sLeft.sSubstr(0, sLeft.uiLength() - arr_freq_endings[*iter_ET].m_iCutRight) + arr_freq_endings[*iter_ET].m_sLemmaFinal;
                if (!bIsValidLemma (tmpWf.m_sLemma))
                {
                    continue;
                }
                // Check if what we've found is a new wordform
                bool bExists = false;
                for (vector<CHasher>::iterator itWf = vecPossibleWordforms.begin();
                     itWf != vecPossibleWordforms.end();
                     ++itWf)
                {
                    if ((*itWf).m_sLemma == tmpWf.m_sLemma && (*itWf).iGramHash() == *itHash)
                    {
                        bExists = true;
                    }
                }
                if (!bExists)
                {
                    tmpWf.hDecodeHash(*itHash);
                    tmpWf.m_llLexemeId = 0;
                    vecPossibleWordforms.push_back(tmpWf);
                }
            }
        }
    }
    vecGram.clear();
    return 0;
}

Ejemplo n.º 7

Mostrar archivo

Archivo: Analyzer.cpp Proyecto: kbogatyrev/Zal-Windows

int CAnalyzer::iAnalyze(CEString sWordform,
                        vector<CHasher>& vecPossibleWordforms,
                        BOOL bGuess)
{
    // Be careful: it changes the input
    if (sWordform.uiLength() <= 0 || m_pDb == NULL) // || vecPossibleWordforms == NULL)
    {
        return -1;
    }

// Multiple stress marks??

    CEString sWordformOriginal(sWordform);
    int iStressPosStem = -1, iStressPosEnding = -1;
    int iStressPos = sWordform.uiFindOneOf(0, L"<\u0301");
    if (ecNotFound == iStressPos)
    {
        iStressPos = -1;
    }
    else
    {
        sWordform.sErase(iStressPos, 1);
    }

/*
    wsmatch result;
    bool b_match = regex_match(wstring(sWordform), result, (const wregex)L"^([^<\u0301]*)([<\u0301])(.*)$");
    if (b_match == true)
    {
        CEString sLeft = (CEString)result[1];
        CEString str_delimiter = (CEString)result[2];
        CEString sRight = (CEString)result[3];
        sWordform = sLeft + sRight;
        if (str_delimiter[0] == L'<')       // кор<ова
        {
            iStressPos = sLeft.length();
        }
        else                                // коро\u0301ва
        {
            iStressPos = sLeft.length() - 1;
        }
    }
    else
    {
        iStressPos = -1;
    }
*/

    wsmatch result;
    bool bMatch = sWordform.bRegexMatch(L"^([^<\u0301]*)([<\u0301])(.*)$");
    if (bMatch == true)
    {
        CEString sLeft = sWordform.sGetRegexMatch(0);
        CEString sDelimiter = sWordform.sGetRegexMatch(1);
        CEString sRight = sWordform.sGetRegexMatch(2);
        sWordform = sLeft + sRight;
        if (sDelimiter[0] == L'<')       // кор<ова
        {
            iStressPos = sLeft.uiLength();
        }
        else                                // коро\u0301ва
        {
            iStressPos = sLeft.uiLength() - 1;
        }
    }
    else
    {
        iStressPos = -1;
    }

    CEString sLeft, sRight;
    vector<stStemLinks> vecStems;
    vecPossibleWordforms.clear();
    for (int iLeft = sWordform.uiLength(); iLeft >= 0; --iLeft)
    {
        sLeft = sWordform.sSubstr(0, iLeft);
        sRight = sWordform.sSubstr(iLeft, sWordform.uiLength() - iLeft);

        // Stress positions for the stem and the ending
        if (iStressPos == -1)
        {
            iStressPosStem = iStressPosEnding = -2;
        }
        else if (iStressPos >= sLeft.uiLength())
        {
            iStressPosStem = -1;
            iStressPosEnding = iStressPos - sLeft.uiLength();
        }
        else
        {
            iStressPosStem = iStressPos;
            iStressPosEnding = -1;
        }
        
        vecStems.clear();
        iLookUpStems(vecStems, sLeft, iStressPosStem);
        if (vecStems.empty())
        {
            continue;
        }
        iCheckEndings(vecPossibleWordforms, vecStems, sLeft, sRight, iStressPosEnding);
    }

    // If we have no result, try cutting of possible prefixes
    if (vecPossibleWordforms.empty())
    {
        for (int iLeft = min(sWordformOriginal.uiLength(), 4); iLeft >= 1; --iLeft)
        {
            sLeft = sWordformOriginal.sSubstr(0, iLeft);
            sRight = sWordformOriginal.sSubstr(iLeft, sWordformOriginal.uiLength() - iLeft);
            if (iLeft == 4)
            {
                if (sLeft == L"пол-")
                {
                    int iResult = iAnalyze(sRight, vecPossibleWordforms, bGuess);
                    if (iResult > 0)
                    {
                        for (int iWf = iResult - 1; iWf >= 0; --iWf)
                        {
                            if (vecPossibleWordforms[iWf].m_ePos != POS_NOUN ||
                                vecPossibleWordforms[iWf].m_eNumber != NUM_SG ||
                                vecPossibleWordforms[iWf].m_eCase != CASE_GEN)
                            {
                                vecPossibleWordforms.erase(vecPossibleWordforms.begin() + iWf);
                            }
                        }
                        if (vecPossibleWordforms.size() > 0)
                        {
                            return vecPossibleWordforms.size();
                        }
                    }
                }
            }
            else if (iLeft == 3)
            {
                if (sLeft == L"пол")
                {
                    int iResult = iAnalyze(sRight, vecPossibleWordforms, bGuess);
                    if (iResult > 0)
                    {
                        for (int iWf = iResult - 1; iWf >= 0; --iWf)
                        {
                            if (vecPossibleWordforms[iWf].m_ePos != POS_NOUN ||
                                vecPossibleWordforms[iWf].m_eNumber != NUM_SG ||
                                vecPossibleWordforms[iWf].m_eCase != CASE_GEN)
                            {
                                vecPossibleWordforms.erase(vecPossibleWordforms.begin() + iWf);
                            }
                        }
                        if (vecPossibleWordforms.size() > 0)
                        {
                            return vecPossibleWordforms.size();
                        }
                    }
                }
            }
            else if (iLeft == 2)
            {
                if (sLeft == L"не")
                {
                    int iResult = iAnalyze(sRight, vecPossibleWordforms, bGuess);
                    if (iResult > 0)
                    {
                        for (int iWf = iResult - 1; iWf >= 0; --iWf)
                        {
                            if (vecPossibleWordforms[iWf].m_ePos != POS_NOUN ||
                                vecPossibleWordforms[iWf].m_ePos != POS_ADJ ||
                                vecPossibleWordforms[iWf].m_ePos != POS_VERB ||
                                (vecPossibleWordforms[iWf].m_ePos == POS_VERB &&
                                 (vecPossibleWordforms[iWf].m_eSubparadigm == SUBPARADIGM_PRESENT_TENSE ||
                                  vecPossibleWordforms[iWf].m_eSubparadigm == SUBPARADIGM_PAST_TENSE ||
                                  vecPossibleWordforms[iWf].m_eSubparadigm == SUBPARADIGM_INFINITIVE)))
                            {
                                vecPossibleWordforms.erase(vecPossibleWordforms.begin() + iWf);
                            }
                        }
                        if (vecPossibleWordforms.size() > 0)
                        {
                            return vecPossibleWordforms.size();
                        }
                    }
                }
            }
        }
    }
    // Now, if we haven't found anything, we may guess the lexeme
    if (vecPossibleWordforms.empty() && bGuess == TRUE)
    {
        for (int iLeft = 0; iLeft <= sWordform.uiLength(); ++iLeft)
        {
            sLeft = sWordform.sSubstr (0, iLeft);
            sRight = sWordform.sSubstr (iLeft, sWordform.uiLength() - iLeft);

            // Stress positions for the stem and the ending
            if (iStressPos == -1)
            {
                iStressPosStem = iStressPosEnding = -2;
            }
            else if (iStressPos >= sLeft.uiLength())
            {
                iStressPosStem = -1;
                iStressPosEnding = iStressPos - sLeft.uiLength();
            }
            else
            {
                iStressPosStem = iStressPos;
                iStressPosEnding = -1;
            }
            vecStems.clear();
            iCheckEndings (vecPossibleWordforms, vecStems, sLeft, sRight, iStressPosEnding);
            if ((bContainsPlausibleVariants (vecPossibleWordforms) && sRight.uiLength() <= 3) ||
                vecPossibleWordforms.size() >= 4)
            {
                break;
            }
        }
        if (vecPossibleWordforms.size() > 4)
        {
            LeaveMostPlausible (vecPossibleWordforms);
        }
    }
    return vecPossibleWordforms.size();
}

Ejemplo n.º 8

Mostrar archivo

Archivo: FormBuilderNouns.cpp Proyecto: kbogatyrev/Zal-Windows

ET_ReturnCode CFormBuilderNouns::eBuild()
{
    ASSERT(m_pLexeme);   // we assume base class ctor took care of this

    ET_ReturnCode rc = H_NO_ERROR;

    m_pEndings = new CNounEndings(m_pLexeme);
    if (NULL == m_pEndings)
    {
        return H_ERROR_POINTER;
    }

    if (rc != H_NO_ERROR)
    {
        return rc;
    }

    ET_Animacy eAnimacy = m_pLexeme->eAnimacy();
    ET_Gender eoGender = m_pLexeme->eGender();

    CHasher gramIterator;
    gramIterator.Initialize(eoGender, eAnimacy);
    do
    {
        if ((L"мн." == m_pLexeme->sMainSymbol()) && (gramIterator.m_eNumber == NUM_SG))
        {
            continue;
        }

        if (NUM_PL == gramIterator.m_eNumber && 
            (CASE_PART == gramIterator.m_eCase || CASE_LOC == gramIterator.m_eCase))
        {
            continue;
        }

        if (CASE_PART == gramIterator.m_eCase && !m_pLexeme->bSecondGenitive())
        {
            continue;
        }

        if (CASE_LOC == gramIterator.m_eCase && !m_pLexeme->bSecondLocative())
        {
            continue;
        }

        // Handle acc ending
        ET_Case eEndingCase = gramIterator.m_eCase;
        if (CASE_ACC == gramIterator.m_eCase)
        {
            rc = eHandleAccEnding (gramIterator.m_eNumber, eEndingCase);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }
        }
        if (CASE_PART == gramIterator.m_eCase || CASE_LOC == gramIterator.m_eCase)
        {
            eEndingCase = CASE_DAT;
        }

        CEString sLemma (m_pLexeme->sGraphicStem());
        if (m_pLexeme->bHasIrregularForms())
        {
            bool bSkipRegular = false;
            rc = eCheckIrregularForms (gramIterator.m_eGender, 
                                       gramIterator.m_eAnimacy,
                                       gramIterator.m_eCase,
                                       eEndingCase,
                                       gramIterator.m_eNumber,
                                       bSkipRegular);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }
            if (bSkipRegular)
            {
                // Workaround for lack of "исх. форма иррег." mark in current source
                if (GENDER_M == gramIterator.m_eGender && NUM_SG == gramIterator.m_eNumber && CASE_NOM == gramIterator.m_eCase)
                {
                    m_bIrregularSourceForm = true;
                }
                continue;
            }
        }

        rc = eHandleStemAugment (sLemma, gramIterator.m_eNumber, gramIterator.m_eCase);
        if (rc != H_NO_ERROR)
        {
            return rc;
        }

        ET_StressLocation eStress = STRESS_LOCATION_UNDEFINED;
        if (CASE_LOC == gramIterator.m_eCase)
        {
            eStress = STRESS_LOCATION_ENDING;
        }
        else
        {
            rc = eGetStressType (gramIterator.m_eNumber, eEndingCase, eStress);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }
        }

        ((CNounEndings *)m_pEndings)->eSelect(gramIterator.m_eNumber, eEndingCase, eStress);
        int iNumEndings = m_pEndings->iCount();
        if (iNumEndings < 1)
        {
            if (m_pLexeme->iType() != 0)
            {
                ASSERT(0);
                ERROR_LOG(L"No endings");
            }
            continue;
        }

        CEString sSavedLemma (sLemma);      // lemma can change, e.g. because of a fleetimg vowel
        for (int iEnding = 0; iEnding < iNumEndings; ++iEnding, sLemma = sSavedLemma)
        {
            // Get ending and modify as necessary
            CEString sEnding;
            unsigned __int64 llEndingKey = -1;
            rc = m_pEndings->eGetEnding(iEnding, sEnding, llEndingKey);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }

            if (8 == m_pLexeme->iType() && GENDER_N != m_pLexeme->eGender())
            {
                if (sLemma.bEndsWithOneOf (L"шжчщц"))
                {
                    if (sEnding.bStartsWith (L"я"))
                    {
                        continue;
                    }
                }
                else
                {
                    if (sEnding.bStartsWith (L"а"))
                    {
                        continue;
                    }
                }
            }

            bool bHasFleetingVowel = false;
            rc = eFleetingVowelCheck (gramIterator.m_eNumber, 
                                      eEndingCase,
                                      gramIterator.m_eGender, 
                                      eStress,
                                      SUBPARADIGM_NOUN,
                                      sEnding,
                                      sLemma);
            if (rc != H_NO_ERROR)
            {
                continue;
            }

            vector<int> vecStress;
            rc = eGetStressPositions (sLemma, sEnding, eStress, vecStress);
            if (rc != H_NO_ERROR)
            {
                continue;
            }

            CWordForm * pWordForm = NULL;
            rc = eCreateFormTemplate (gramIterator.m_eNumber, gramIterator.m_eCase, sLemma, pWordForm);
            if (rc != H_NO_ERROR)
            {
                continue;
            }

            if (1 == vecStress.size() || m_pLexeme->bIsMultistressedCompound())
            {
                vector<int>::iterator itStressPos = vecStress.begin();
                for (; itStressPos != vecStress.end(); ++itStressPos)
                {
                    pWordForm->m_mapStress[*itStressPos] = STRESS_PRIMARY;  // primary
                    rc = eHandleYoAlternation (eStress, *itStressPos, pWordForm->m_sLemma, sEnding);
                    if (rc != H_NO_ERROR)
                    {
                        continue;
                    }
                    pWordForm->m_sEnding = sEnding;
                    pWordForm->m_llEndingDataId = llEndingKey;
                    pWordForm->m_sWordForm = pWordForm->m_sLemma + sEnding;
                }
                m_pLexeme->AddWordForm (pWordForm);                        
            }
            else
            {
                vector<int>::iterator itStressPos = vecStress.begin();
                for (; itStressPos != vecStress.end(); ++itStressPos)
                {
                    if (itStressPos != vecStress.begin())
                    {
                        CWordForm * pwfVariant = NULL;
                        CloneWordForm (pWordForm, pwfVariant);
                        pwfVariant->m_mapStress.clear();
                        pWordForm = pwfVariant;
                    }
                    pWordForm->m_mapStress[*itStressPos] = STRESS_PRIMARY;  // primary
                    rc = eHandleYoAlternation (eStress, *itStressPos, pWordForm->m_sLemma, sEnding);
                    if (rc != H_NO_ERROR)
                    {
                        continue;
                    }
                    pWordForm->m_sWordForm = pWordForm->m_sLemma + sEnding;
                    pWordForm->m_sEnding = sEnding;
                    pWordForm->m_llEndingDataId = llEndingKey;

                    m_pLexeme->AddWordForm (pWordForm);
                }
            }
        }   //  for (int iEnding = 0; ... )
    
    } while (gramIterator.bIncrement());

    return H_NO_ERROR;

}    //  eBuildNounForms()

Ejemplo n.º 9

Mostrar archivo

Archivo: FormBuilderNouns.cpp Proyecto: kbogatyrev/Zal-Windows

ET_ReturnCode CFormBuilderNouns::eHandleStemAugment (CEString& sLemma, ET_Number eNumber, ET_Case eCase)
{
    ASSERT(m_pLexeme);   // we assume base class ctor took care of this

    if (m_pLexeme->iStemAugment() < 1)
    {
        return H_NO_ERROR;
    }

    if (1 == m_pLexeme->iType())
    {
        sLemma.sErase (sLemma.uiLength()-2, 2);  // римлянин, южанин, армянин
    }
    if (3 == m_pLexeme->iType())
    {
        CEString& sGs = m_pLexeme->sGraphicStem();
        if (NUM_SG == eNumber)
        {
            if ((CASE_NOM == eCase) || 
                (ANIM_NO == m_pLexeme->eAnimacy() && CASE_ACC == eCase))
            {
                return H_NO_ERROR;
            }
            else
            {
                sLemma.sErase (sLemma.uiLength()-2, 1);
                return H_NO_ERROR;
            }
        }
        if (NUM_PL == eNumber)
        {
            if (sGs.bEndsWith (L"онок"))
            {
                sLemma.sErase (sLemma.uiLength()-4, 4);
                sLemma += L"ат";
                return H_NO_ERROR;
            }
            if (sGs.bEndsWith (L"ёнок"))
            {
                sLemma.sErase (sLemma.uiLength()-4, 4);
                sLemma += L"ят";
                return H_NO_ERROR;
            }
            if (sGs.bEndsWith (L"оночек"))
            {
                sLemma.sErase (sLemma.uiLength()-6, 6);
                if ((CASE_GEN == eCase) ||                        
                    (CASE_ACC == eCase && ANIM_YES == m_pLexeme->eAnimacy()))
                                                     // they all should be animate?
                {
                    sLemma += L"аток";
                }
                else
                {
                    sLemma += L"атк";
                }
                return H_NO_ERROR;
            }
            if (sGs.bEndsWith (L"ёночек"))
            {
                sLemma.sErase (sLemma.uiLength()-6, 6);
                if ((CASE_GEN == eCase) ||                        
                    (CASE_ACC == eCase && ANIM_YES == m_pLexeme->eAnimacy()))
                                                     // they all should be animate?
                {
                    sLemma += L"яток";
                }
                else
                {
                    sLemma += L"ятк";
                }
                return H_NO_ERROR;
            }
        }
    }       //  if (3 == m_pLexeme->i_Type)
    if (8 == m_pLexeme->iType())
    {
        if (NUM_SG == eNumber)
        {
            if ((CASE_NOM == eCase) || 
                (ANIM_NO == m_pLexeme->eAnimacy() && CASE_ACC == eCase))
            {
                return H_NO_ERROR;
            }
            else
            {
                sLemma += L"ен";
            }
        }
        else
        {
            sLemma += L"ен";
        }
    }

    return H_NO_ERROR;

}   //  eHandleStemAugment (...)

Ejemplo n.º 10

Mostrar archivo

Archivo: FormBuilderAdjShort.cpp Proyecto: kbogatyrev/Zal-Windows

ET_ReturnCode CFormBuilderShortAdj::eGetStressTypes (ET_Number eNumber, 
                                                     ET_Gender eGender, 
                                                     vector<ET_StressLocation>& vecStressType)
{
    ASSERT(m_pLexeme);   // we assume base class ctor took care of this

    ET_ReturnCode rc = H_NO_ERROR;

    if (NUM_PL == eNumber && GENDER_UNDEFINED != eGender)
    {
        ASSERT(0);
        ERROR_LOG (L"Unexpected gender/number values.");
        return H_ERROR_INVALID_ARG;
    }

    if (NUM_SG == eNumber && GENDER_UNDEFINED == eGender)
    {
        ASSERT(0);
        ERROR_LOG (L"Unexpected gender/number values.");
        return H_ERROR_INVALID_ARG;
    }

//    if (GENDER_M == eGender)
//    {
//        ASSERT (NUM_SG == eNumber);
//        vecStressType.push_back (STRESS_LOCATION_STEM);
//        return H_NO_ERROR;
//    }

    //
    // Exception: part past passive short ending in stressed -Annyj/-jAnnyj;
    // see GDRL p. 86 footnote 4
    //
    if (SUBPARADIGM_PART_PAST_PASS_SHORT == m_eSubparadigm)
    {
        CGramHasher hasher (POS_VERB, SUBPARADIGM_PART_PAST_PASS_LONG, CASE_NOM, NUM_SG, 
                            GENDER_M, PERSON_UNDEFINED, ANIM_NO, m_pLexeme->eAspect(), 
                            m_pLexeme->eIsReflexive());
        CWordForm * pNSgMLong = NULL;
        rc = m_pLexeme->eWordFormFromHash (hasher.iGramHash(), 0, pNSgMLong);
        if (rc != H_NO_ERROR)
        {
            return rc;
        }
        if (NULL == pNSgMLong)
        {
            ASSERT(0);
            ERROR_LOG (L"Failed to obtain N Sg m of the long form.");
            return H_ERROR_POINTER;
        }
        
        CEString sNSgMLong (pNSgMLong->m_sWordForm);
        if (sNSgMLong.bEndsWith (L"анный") || sNSgMLong.bEndsWith (L"янный") ||
            sNSgMLong.bEndsWith (L"енный"))
        {
            map<int, ET_StressType>::iterator itStress = pNSgMLong->m_mapStress.begin();
            for (; itStress != pNSgMLong->m_mapStress.end(); ++itStress)
            {
                if (sNSgMLong.uiNSyllables()-2 == (*itStress).first 
                    && STRESS_PRIMARY == (*itStress).second)
                {
// 1. -at'/-jat' [7] -- 1a, p. 83
// 2. monosyll verbs -- same as past: zvannyj (113) and dannyj 117
    // &&&& TODO
                }
            }
        }
    }

    ET_AccentType eAt = AT_UNDEFINED;
    if (AT_UNDEFINED == m_eAccentType2)
    {
        eAt = m_eAccentType1;
    }
    else
    {
        eAt = m_eAccentType2;
    }

    switch (eAt)
    {
        case AT_UNDEFINED:
        {
            ASSERT(0);
            ERROR_LOG (L"Undefined accent type.");
            return H_ERROR_GENERAL;
        }
        case AT_A:
        {
            vecStressType.push_back (STRESS_LOCATION_STEM);

            // Type sorvana: GDRL, p. 86
            if (SUBPARADIGM_PART_PAST_PASS_SHORT == m_eSubparadigm && 
                GENDER_F == eGender && 
                AT_C == m_pLexeme->eAccentType2())
            {
                vecStressType.push_back (STRESS_LOCATION_ENDING);
            }

            return H_NO_ERROR;
        }
        case AT_A1:
        {
            if (GENDER_M == eGender)
            {
                ASSERT(NUM_SG == eNumber);
                vecStressType.push_back(STRESS_LOCATION_STEM);
                return H_NO_ERROR;
            }
            if (GENDER_F == eGender)
            {
                ASSERT (NUM_SG == eNumber);
                vecStressType.push_back (STRESS_LOCATION_STEM);
                vecStressType.push_back (STRESS_LOCATION_ENDING);
                return H_NO_ERROR;
            }
            if (GENDER_N == eGender)
            {
                ASSERT (NUM_SG == eNumber);
                vecStressType.push_back (STRESS_LOCATION_STEM);
                return H_NO_ERROR;
            }
            if (NUM_PL == eNumber)
            {
                vecStressType.push_back (STRESS_LOCATION_STEM);
                return H_NO_ERROR;
            }

            ASSERT(0);
            ERROR_LOG (L"Bad arguments.");
            return H_ERROR_INVALID_ARG;
        }
        case AT_B:
        {
            vecStressType.push_back (STRESS_LOCATION_ENDING);
            return H_NO_ERROR;
        }
        case AT_B1:
        {
            if (GENDER_M == eGender)
            {
                ASSERT(NUM_SG == eNumber);
                vecStressType.push_back(STRESS_LOCATION_ENDING);
                return H_NO_ERROR;
            }
            if (GENDER_F == eGender || GENDER_N == eGender)
            {
                ASSERT (NUM_SG == eNumber);
                vecStressType.push_back (STRESS_LOCATION_ENDING);
                return H_NO_ERROR;
            }
            if (NUM_PL == eNumber)
            {
                ASSERT (GENDER_UNDEFINED == eGender);
                vecStressType.push_back (STRESS_LOCATION_STEM);
                vecStressType.push_back (STRESS_LOCATION_ENDING);
                return H_NO_ERROR;
            }

            ASSERT(0);
            ERROR_LOG (L"Bad arguments.");
            return H_ERROR_INVALID_ARG;
        }
        case AT_C:
        {
            if (GENDER_M == eGender)
            {
                ASSERT(NUM_SG == eNumber);
                vecStressType.push_back(STRESS_LOCATION_STEM);
                return H_NO_ERROR;
            }
            if (GENDER_F == eGender)
            {
                ASSERT (NUM_SG == eNumber);
                vecStressType.push_back (STRESS_LOCATION_ENDING);
                return H_NO_ERROR;
            }
            if (GENDER_N == eGender || NUM_PL == eNumber)
            {
                vecStressType.push_back (STRESS_LOCATION_STEM);
                return H_NO_ERROR;
            }

            ASSERT(0);
            ERROR_LOG (L"Bad arguments.");
            return H_ERROR_INVALID_ARG;
        }
        case AT_C1:
        {
            if (GENDER_M == eGender)
            {
                ASSERT(NUM_SG == eNumber);
                vecStressType.push_back(STRESS_LOCATION_STEM);
                return H_NO_ERROR;
            }
            if (GENDER_F == eGender)
            {
                ASSERT (NUM_SG == eNumber);
                vecStressType.push_back (STRESS_LOCATION_ENDING);
                return H_NO_ERROR;
            }
            if (GENDER_N == eGender)
            {
                ASSERT (NUM_SG == eNumber);
                vecStressType.push_back (STRESS_LOCATION_STEM);
                return H_NO_ERROR;
            }
            if (NUM_PL == eNumber)
            {
                ASSERT (GENDER_UNDEFINED == eGender);
                vecStressType.push_back (STRESS_LOCATION_STEM);
                vecStressType.push_back (STRESS_LOCATION_ENDING);
                return H_NO_ERROR;
            }

            ASSERT(0);
            ERROR_LOG (L"Bad arguments.");
            return H_ERROR_INVALID_ARG;
        }
        case AT_C2:
        {
            if (GENDER_M == eGender)
            {
                ASSERT(NUM_SG == eNumber);
                vecStressType.push_back(STRESS_LOCATION_STEM);
                return H_NO_ERROR;
            }
            if (GENDER_F == eGender)
            {
                ASSERT (NUM_SG == eNumber);
                vecStressType.push_back (STRESS_LOCATION_ENDING);
                return H_NO_ERROR;
            }
            if (GENDER_N == eGender)
            {
                ASSERT (NUM_SG == eNumber);
                vecStressType.push_back (STRESS_LOCATION_STEM);
                vecStressType.push_back (STRESS_LOCATION_ENDING);
                return H_NO_ERROR;
            }
            if (NUM_PL == eNumber)
            {
                ASSERT (GENDER_UNDEFINED == eGender);
                vecStressType.push_back (STRESS_LOCATION_STEM);
                vecStressType.push_back (STRESS_LOCATION_ENDING);
                return H_NO_ERROR;
            }

            ASSERT(0);
            ERROR_LOG (L"Bad arguments.");
            return H_ERROR_INVALID_ARG;
        }
        default:
        {
            ASSERT(0);
            ERROR_LOG (L"Illegal accent type.");
            return H_ERROR_INVALID_ARG;
        }
    }

    return H_ERROR_INVALID_ARG;

}   // eGetStressType()

Ejemplo n.º 11

Mostrar archivo

Archivo: FormBuilderAdjShort.cpp Proyecto: kbogatyrev/Zal-Windows

ET_ReturnCode CFormBuilderShortAdj::eHandleDeviations (CWordForm * pWordForm)
{
    ASSERT(m_pLexeme);   // we assume base class ctor took care of this

    ET_ReturnCode rc = H_NO_ERROR;

    try
    {
        //
        // A. Common deviations defined for adjectives (1-2)
        //
        bool bHasCD = false;
        bool bOptionalCD = false;
        int iCd = 1;
        for (; iCd <= 2; ++iCd)
        {
            map<int, bool>::iterator itCd = m_mapCommonDeviations.find (iCd);
            if (m_mapCommonDeviations.end() != itCd)
            {
                bOptionalCD = (*itCd).second;
                break;
            }
            if (m_pLexeme->bFindCommonDeviation (iCd, bOptionalCD))
            {
                break;
            }
        }

        //
        // Only adjectives in -nnyj/-nnij or participia
        //
        if (1 == iCd || 2 == iCd)
        {
            if (1 == iCd && GENDER_M != pWordForm->m_eGender)
            {
                return H_NO_MORE;
            }

            if (bOptionalCD && SUBPARADIGM_SHORT_ADJ == m_eSubparadigm)   // store both forms
            {
                CWordForm * pMVariant = NULL;
                CloneWordForm (pWordForm, pMVariant);
                m_pLexeme->AddWordForm (pMVariant);
                pWordForm = pMVariant;
            }

            if (GENDER_M == pWordForm->m_eGender)
            {
                pWordForm->m_sWordForm.sErase (pWordForm->m_sWordForm.uiLength()-1);
                if (m_bFleetingVowel)
                {
                    pWordForm->m_sWordForm.sErase (pWordForm->m_sWordForm.uiLength()-1);
                }
            }
            else
            {
                pWordForm->m_sWordForm.sErase (pWordForm->m_sWordForm.uiLength()-2, 1);
            }
        }   //  if (1 == iCd || 2 == iCd)

        //
        // Common deviations devined for verbs (7-8)
        //
        if (m_pLexeme->bHasCommonDeviation(7))
        {
            iCd = 7;
        }
        else if (m_pLexeme->bHasCommonDeviation(8))  // TODO -- what's that??
        {
            iCd = -1;
        }
        else
        {
            iCd = -1;
        }

        if (iCd > 0 && SUBPARADIGM_PART_PAST_PASS_SHORT == m_eSubparadigm)
        {
            if (m_pLexeme->bDeviationOptional(iCd))   // store both forms
            {
                CWordForm * pMVariant = NULL;
                CloneWordForm (pWordForm, pMVariant);
                m_pLexeme->AddWordForm (pMVariant);
                pWordForm = pMVariant;
            }

            map<int, ET_StressType> mapCorrectedStress;
            map<int, ET_StressType>::iterator itStressPos = pWordForm->m_mapStress.begin();
            for (; itStressPos != pWordForm->m_mapStress.end(); ++itStressPos)
            {
                if (!(*itStressPos).second)
                {
                    mapCorrectedStress[(*itStressPos).first] = STRESS_SECONDARY;
                    continue;
                }
                if ((*itStressPos).first < 1)
                {
                    ASSERT(0);
                    ERROR_LOG (L"Unexpected stress position in cd-7 or cd-8 participle.");
                    return H_ERROR_UNEXPECTED;
                }
                CEString sWf (pWordForm->m_sWordForm);
                mapCorrectedStress[sWf.uiNSyllables()-1] = STRESS_PRIMARY;
            }
        }
    }
    catch (CException& ex)
    {
        CEString sMsg (L"Exception: ");
        sMsg += ex.szGetDescription();
        ERROR_LOG (sMsg);
        return H_EXCEPTION;
    }

    return H_NO_ERROR;

}   //  eHandleDeviations (...)

Ejemplo n.º 12

Mostrar archivo

int _tmain(int argc, _TCHAR* argv[])
{
    wstring sReplaceableB (L"0123456789012345");
    wstring sReplacedB = sReplaceableB.replace (5, 6, L"abcd");

    CEString sReplacableC(L"0123456789012345678901234567890123567890");
    CEString sReplacedCC = sReplacableC.sReplace(L"567890", L"abcd");
    CEString sReplacedC = sReplacableC.sReplace(5, 6, L"abcd");

    sReplaceableB = L"0123456789";
    sReplacedB = sReplaceableB.replace (5, 3, L"a");

    sReplaceableB = L"0123456789";
    sReplacedB = sReplaceableB.replace (8, 2, L"ab");

    sReplaceableB = L"0123456789";
    sReplacedB = sReplaceableB.replace (5, 3, L"a");

    sReplaceableB = L"0123456789";
    sReplacedB = sReplaceableB.replace (8, 2, L"abc");

    wstring sEraseableB (L"0123456789");
    wstring sErasedB = sEraseableB.erase (5, 3);

    sEraseableB = L"0123456789";
    sErasedB = sEraseableB.erase (5, 5);

    sEraseableB = L"0123456789";
    sErasedB = sEraseableB.erase (5, 7);

    sEraseableB = L"0123456789";
    sErasedB = sEraseableB.erase (5);

    sEraseableB = L"0123456789";
    sErasedB = sEraseableB.erase();

    try
    {
        sEraseableB = L"0123456789";
        sErasedB = sEraseableB.erase (12, 7);
    }
    catch (...)
    {
    }

    try
    {
        sEraseableB = L"0123456789";
        sErasedB = sEraseableB.erase (12);
    }
    catch (...)
    {
    }

    // Ctors
    CEString sEmptyString;
    if (0 != sEmptyString.uiLength() || 0 != sEmptyString.uiGetNumOfTokens() || 
        0 != sEmptyString.uiGetNumOfFields() || 0 != sEmptyString.uiGetVisibleLength())
    {
        ERROR_LOG (L"Initialization error");
    }


    CEString sCopy (sEmptyString);
    if (0 != sCopy.uiLength() || 0 != sCopy.uiGetNumOfTokens() || 
        0 != sCopy.uiGetNumOfFields() || 0 != sCopy.uiGetVisibleLength())
    {
        ERROR_LOG (L"Initialization error");
    }

    sCopy = L"0123456789";
    CEString sCopy2 (sCopy);

    sCopy2.SetBreakChars (L" -/");
    CEString sCopy3 (sCopy2);

    CEString sFromCString (L"0123456789");
    if (10 != sFromCString.uiLength() || 1 != sFromCString.uiGetNumOfTokens() || 
        1 != sFromCString.uiGetNumOfFields() || 10 != sFromCString.uiGetVisibleLength())
    {
        ERROR_LOG (L"Initialization error");
    }

// TODO: operator ()

    CEString sSquareBracketsTest (L"0123456789");
    sSquareBracketsTest[1] = L'a';
    if (sSquareBracketsTest != L"0a23456789")
    {
        ERROR_LOG (L"Square brackets operator error");
    }

    sSquareBracketsTest = L"0123456789";
    sSquareBracketsTest[0] = L'a';
    if (sSquareBracketsTest != L"a123456789")
    {
        ERROR_LOG (L"Square brackets operator error");
    }

    sSquareBracketsTest = L"0123456789";
    sSquareBracketsTest[9] = L'a';
    if (sSquareBracketsTest != L"012345678a")
    {
        ERROR_LOG (L"Square brackets operator error");
    }

    sSquareBracketsTest = L"0123456789";
    CEString sLetter = sSquareBracketsTest[1];
    if (L"1" != sLetter)
    {
        ERROR_LOG (L"Square brackets operator error");
    }

    // Comparison
    ERelation eRet = CEString::eCompare (L"1234567", L"1234567");
    if (ecEqual != eRet)
    {
        ERROR_LOG (L"Comparison error");
    }

    eRet = CEString::eCompare (L"1234567", L"1234566");
    if (eRet != ecGreater)
    {
        ERROR_LOG (L"Comparison error");
    }

    eRet = CEString::eCompare (L"1234566", L"1234567");
    if (eRet != ecLess)
    {
        ERROR_LOG (L"Comparison error");
    }

    eRet = CEString::eCompare (L"123456", L"1234567");
    if (eRet != ecLess)
    {
        ERROR_LOG (L"Comparison error");
    }

    eRet = CEString::eCompare (L"1234567", L"123456");
    if (eRet != ecGreater)
    {
        ERROR_LOG (L"Comparison error");
    }

    eRet = CEString::eCompareNoCase (L"AbCdEfG", L"ABCDEFg");
    if (ecEqual != eRet)
    {
        ERROR_LOG (L"Comparison error");
    }

    eRet = CEString::eCompareNoCase (L"АбВгДЕ", L"АБВГДе");
    if (ecEqual != eRet)
    {
        ERROR_LOG (L"Comparison error");
    }

    bool bRet = CEString::bIn (L'2', L"0123456789");
    if (!bRet)
    {
        ERROR_LOG (L"bIn() failed.");
    }

    bRet = CEString::bIn (L'a', L"0123456789");
    if (bRet)
    {
        ERROR_LOG (L"bIn() failed.");
    }

    CEString sSearcheable (L"0123456789");
    unsigned int uiFindRet = sSearcheable.uiFind (L"123");
    if (1 != uiFindRet)
    {
        ERROR_LOG (L"uiFind() failed.");
    }

    uiFindRet = sSearcheable.uiFind (L"abc");
    if (ecNotFound != uiFindRet)
    {
        ERROR_LOG (L"uiFind() failed.");
    }

    sSearcheable = L"aBcDeFgHiJ";
    uiFindRet = sSearcheable.uiFindNoCase (L"bCDEF");
    if (ecNotFound == uiFindRet)
    {
        ERROR_LOG (L"uiFindNoCase() failed.");
    }

    sSearcheable = L"012345543210";
    uiFindRet = sSearcheable.uiRFind (L"5");
    if (6 != uiFindRet)
    {
        ERROR_LOG (L"uiRFind() failed.");
    }

//    unsigned int uiRFindNoCase (const wchar_t * szRhs) const

    sSearcheable = L"0123456789";
    uiFindRet = sSearcheable.uiFindFirstOf (L"234");
    if (2 != uiFindRet)
    {
        ERROR_LOG (L"uiFindFirstOf() failed.");
    }

//    unsigned int uiFindFirstOfNoCase (const wchar_t * szSet) const
    sSearcheable = L"0120120123456789";
    uiFindRet = sSearcheable.uiFindOneOf (3, L"234");
    if (5 != uiFindRet)
    {
        ERROR_LOG (L"uiFindOneOf() failed.");
    }

    sSearcheable = L"0123456789";
    uiFindRet = sSearcheable.uiFindLastOf (L"234");
    if (4 != uiFindRet)
    {
        ERROR_LOG (L"uiFindLastOf() failed.");
    }

//    unsigned int uiFindLastOfNoCase (const wchar_t * szSet) const
    bRet = sSearcheable.bStartsWith (L"012");
    if (!bRet)
    {
        ERROR_LOG (L"bStartsWith() failed.");
    }

    bRet = sSearcheable.bStartsWith (L"234");
    if (bRet)
    {
        ERROR_LOG (L"bStartsWith() failed.");
    }

    sSearcheable = L"aBcDeFgHiJ";
    bRet = sSearcheable.bStartsWithNoCase (L"abcd");
    if (!bRet)
    {
        ERROR_LOG (L"bStartsWithNoCase() failed.");
    }

    sSearcheable = L"0123456789";
    bRet = sSearcheable.bStartsWithOneOf (L"012");
    if (!bRet)
    {
        ERROR_LOG (L"bStartsWithOneOf() failed.");
    }

    bRet = sSearcheable.bStartsWithOneOf (L"123");
    if (bRet)
    {
        ERROR_LOG (L"bStartsWithOneOf() failed.");
    }

    sSearcheable = L"aBcDeFgHiJ";
    bRet = sSearcheable.bStartsWithOneOfNoCase (L"abc");
    if (!bRet)
    {
        ERROR_LOG (L"bStartsWithOneOf() failed.");
    }

    bRet = sSearcheable.bStartsWithOneOfNoCase (L"bc");
    if (bRet)
    {
        ERROR_LOG (L"bStartsWithOneOf() failed.");
    }

    sSearcheable = L"аБвГдЕёжзи";
    bRet = sSearcheable.bStartsWithOneOfNoCase (L"абв");
    if (!bRet)
    {
        ERROR_LOG (L"bStartsWithOneOf() failed.");
    }

    bRet = sSearcheable.bStartsWithOneOfNoCase (L"бв");
    if (bRet)
    {
        ERROR_LOG (L"bStartsWithOneOf() failed.");
    }

    sSearcheable = L"0123456789";
    bRet = sSearcheable.bEndsWith (L"789");
    if (!bRet)
    {
        ERROR_LOG (L"bStartsWith() failed.");
    }

    bRet = sSearcheable.bEndsWith (L"123");
    if (bRet)
    {
        ERROR_LOG (L"bStartsWith() failed.");
    }

    sSearcheable = L"abcdeFgHiJ";
    bRet = sSearcheable.bEndsWithNoCase (L"hij");
    if (!bRet)
    {
        ERROR_LOG (L"bEndsWithNoCase() failed.");
    }

    bRet = sSearcheable.bEndsWithNoCase (L"ghi");
    if (bRet)
    {
        ERROR_LOG (L"bEndsWithNoCase() failed.");
    }

    sSearcheable = L"абвгдЕёЖзИ";
    bRet = sSearcheable.bEndsWithNoCase (L"жзи");
    if (!bRet)
    {
        ERROR_LOG (L"bEndsWithNoCase() failed.");
    }

    bRet = sSearcheable.bEndsWithNoCase (L"ёжз");
    if (bRet)
    {
        ERROR_LOG (L"bEndsWithNoCase() failed.");
    }

    sSearcheable = L"0123456789";
    bRet = sSearcheable.bEndsWithOneOf (L"ab9");
    if (!bRet)
    {
        ERROR_LOG (L"bEndsWithOneOf() failed.");
    }

    bRet = sSearcheable.bEndsWithOneOf (L"ab8");
    if (bRet)
    {
        ERROR_LOG (L"bEndsWithOneOf() failed.");
    }

    sSearcheable = L"aBcDeFgHiJ";
    bRet = sSearcheable.bEndsWithOneOfNoCase (L"abj");
    if (!bRet)
    {
        ERROR_LOG (L"bEndsWithOneOfNoCase failed.");
    }

    bRet = sSearcheable.bEndsWithOneOfNoCase (L"abc");
    if (bRet)
    {
        ERROR_LOG (L"bEndsWithOneOfNoCase failed.");
    }

    sSearcheable = L"абвгдЕёЖзИ";
    bRet = sSearcheable.bEndsWithOneOfNoCase (L"abи");
    if (!bRet)
    {
        ERROR_LOG (L"bEndsWithOneOfNoCase failed.");
    }

    bRet = sSearcheable.bEndsWithOneOfNoCase (L"abc");
    if (bRet)
    {
        ERROR_LOG (L"bEndsWithOneOfNoCase failed.");
    }

    // Operators
    CEString sLhs (L"01234");
    CEString sRhs (L"56789");
    
    bRet = (sLhs == sRhs);
    if (bRet)
    {
        ERROR_LOG (L"Comparison error");
    }

    bRet = (sLhs == L"01234");
    if (!bRet)
    {
        ERROR_LOG (L"Comparison error");
    }

    bRet = (L"01234" == sLhs);
    if (!bRet)
    {
        ERROR_LOG (L"Comparison error");
    }

//    CString csLhs (L"01234");
//    CString csRhs (L"56789");
//    bRet = (L"01234" == csLhs);
//    if (!bRet)
//    {
//        ERROR_LOG (L"CString behavior does not match CEString behavior");
//    }

    bRet = (sLhs < sRhs);
    if (!bRet)
    {
        ERROR_LOG (L"Comparison error");
    }

    bRet = (sLhs > sRhs);
    if (bRet)
    {
        ERROR_LOG (L"Comparison error");
    }

    bRet = (sLhs <= sRhs);
    if (!bRet)
    {
        ERROR_LOG (L"Comparison error");
    }

    bRet = (sLhs >= sRhs);
    if (bRet)
    {
        ERROR_LOG (L"Comparison error");
    }

    bRet = (sLhs >= sLhs);
    if (!bRet)
    {
        ERROR_LOG (L"Comparison error");
    }


    sEmptyString = L"0123456";
    if (sEmptyString != L"0123456")
    {
        ERROR_LOG (L"Assignemnt or comparison error");
    }

    sEmptyString = sRhs;
    if (sEmptyString != sRhs)
    {
        ERROR_LOG (L"Assignemnt or comparison error");
    }

    //CEString sResult = sLhs + sRhs;
    //if (sResult != L"0123456789")
    //{
    //    ERROR_LOG (L"Concatenation or comparison error");
    //}
    //sResult += L"<--Concatenated";
    //if (sResult != L"0123456789<--Concatenated")
    //{
    //    ERROR_LOG (L"Concatenation or comparison error");
    //}

    CEString sInsertable (L"0123789");
    CEString sInserted = sInsertable.sInsert (4, L"456");
    if (sInserted != sInsertable || sInsertable != L"0123456789")
    {
        ERROR_LOG (L"Insertion error");
    }

    sInsertable = L"012456789";
    sInserted = sInsertable.sInsert (3, L'3');
    if (sInserted != sInsertable || sInsertable != L"0123456789")
    {
        ERROR_LOG (L"Insertion error");
    }

    CEString sErasable (L"012abcd3456789");
    CEString sErased = sErasable.sErase (3, 4);
    if (sErased != sErasable || sErasable != L"0123456789")
    {
        ERROR_LOG (L"Erase error");
    }
    
    sErasable = L"0123456789";
    sErased = sErasable.sErase (3, 7);
    if (sErased != sErasable || sErasable != L"012")
    {
        ERROR_LOG (L"Erase error");
    }

    sErasable = L"0123456789";
    sErased = sErasable.sErase (3, 40);
    if (sErased != sErasable || sErasable != L"012")
    {
        ERROR_LOG (L"Erase error");
    }

    sErasable = L"0123456789";
    sErased = sErasable.sErase (3);
    if (sErased != sErasable || sErasable != L"012")
    {
        ERROR_LOG (L"Erase error");
    }

    sErasable = L"0123456789a";
    sErased = sErasable.sErase (10);
    if (sErased != sErasable || sErasable != L"0123456789")
    {
        ERROR_LOG (L"Erase error");
    }

    sErasable.Erase();
    if (!sErasable.bIsEmpty() || sErasable.uiLength() != 0)
    {
        ERROR_LOG (L"Erase error");
    }

    sErasable = L"0123456789";

    CEString sConvertToUppercase(L"aAbBcC");
    sConvertToUppercase.ToUpper();
    if (sConvertToUppercase != L"AABBCC")
    {
        ERROR_LOG(L"ToUpper error");
    }

    sConvertToUppercase = CEString::sToUpper(L"aAbBcC");
    if (sConvertToUppercase != L"AABBCC")
    {
        ERROR_LOG(L"ToUpper error");
    }

    CEString sConvertToUppercaseCyr(L"aABbcCаАбБвВ");
    sConvertToUppercaseCyr.ToUpper();
    if (sConvertToUppercaseCyr != L"AABBCCААББВВ")
    {
        ERROR_LOG(L"ToUpper error for Cyrillic");
    }

    sConvertToUppercaseCyr = CEString::sToUpper(L"aAbBcCаАбБвВ");
    if (sConvertToUppercaseCyr != L"AABBCCААББВВ")
    {
        ERROR_LOG(L"sToUpper error for Cyrillic");
    }

    CEString sConvertToLowercase(L"aABbcC");
    sConvertToLowercase.ToLower();
    if (sConvertToLowercase != L"aabbcc")
    {
        ERROR_LOG(L"ToLower error");
    }

    sConvertToLowercase = CEString::sToLower(L"aAbBcC");
    if (sConvertToLowercase != L"aabbcc")
    {
        ERROR_LOG(L"ToLower error");
    }

    CEString sConvertToLowercaseCyr(L"aABbcCаАбБвВ");
    sConvertToLowercaseCyr.ToLower();
    if (sConvertToLowercaseCyr != L"aabbccааббвв")
    {
        ERROR_LOG(L"ToLower error for Cyrillic");
    }

    sConvertToLowercaseCyr = CEString::sToLower(L"aAbBcCаАбБвВ");
    if (sConvertToLowercaseCyr != L"aabbccааббвв")
    {
        ERROR_LOG(L"sToLower error for Cyrillic");
    }

    CEString sFromAscii = CEString::sToString("abcdefgxyzABCDEFGXYZ01234567890.,!");
    if (sFromAscii != L"abcdefgxyzABCDEFGXYZ01234567890.,!")
    {
        ERROR_LOG(L"sToString error for ascii conversion");
    }

    CEString sReplaceable(L"01abcd6789");
    CEString sReplaced = sReplaceable.sReplace (2, L"2345");
    if (sReplaced != sReplaceable || sReplaceable != L"0123456789")
    {
        ERROR_LOG (L"Replace error");
    }

    sReplaceable = L"0123456abc";
    sReplaced = sReplaceable.sReplace (7, L"789");
    if (sReplaced != sReplaceable || sReplaceable != L"0123456789")
    {
        ERROR_LOG (L"Replace error");
    }

    sReplaceable = L"0123456a89";
    sReplaced = sReplaceable.sReplace (7, L'7');
    if (sReplaced != sReplaceable || sReplaceable != L"0123456789")
    {
        ERROR_LOG (L"Replace error");
    }

    sReplaceable = L"012345678a";
    sReplaced = sReplaceable.sReplace (9, L'9');
    if (sReplaced != sReplaceable || sReplaceable != L"0123456789")
    {
        ERROR_LOG (L"Replace error");
    }


    sReplaceable = L"01234abc89";
    sReplaced = sReplaceable.sReplace (5, 3, L"567");
    if (sReplaced != sReplaceable || sReplaceable != L"0123456789")
    {
        ERROR_LOG (L"Replace error");
    }

    sReplaceable = L"01234aaa6789";
    sReplaced = sReplaceable.sReplace (5, 3, L"5");
    if (sReplaced != sReplaceable || sReplaceable != L"0123456789")
    {
        ERROR_LOG (L"Replace error");
    }

    sReplaceable = L"01234567ab";
    sErased = sReplaceable.sReplace (8, 2, L"89");
    if (sReplaced != sReplaceable || sReplaceable != L"0123456789")
    {
        ERROR_LOG (L"Replace error");
    }

    sReplaceable = L"01234567a9";
    sReplaced = sReplaceable.sReplace (8, 2, L"8");
    if (sReplaced != sReplaceable || sReplaceable != L"012345678")
    {
        ERROR_LOG (L"Replace error");
    }

    sReplaceable = L"01234567ab";
    sReplaced = sReplaceable.sReplace (8, 2, L"890");
    if (sReplaced != sReplaceable || sReplaceable != L"01234567890")
    {
        ERROR_LOG (L"Replace error");
    }

    sReplaceable = L"0ё2345ё78ёё";
    sReplaceable.Replace (0, 10, L'ё', L'е');
    if (sReplaceable != L"0е2345е78ее")
    {
        ERROR_LOG (L"Replace error");
    }


    CEString sTrimmable (L"     01234     ");
    sTrimmable.TrimLeft();
    if (sTrimmable != L"01234     ")
    {
        ERROR_LOG (L"Trim or comparison error");
    }

    sTrimmable.TrimRight();
    if (sTrimmable != L"01234")
    {
        ERROR_LOG (L"Trim or comparison error");
    }

    sTrimmable = L"     01234     ";
    sTrimmable.Trim();
    if (sTrimmable != L"01234")
    {
        ERROR_LOG (L"Trim or comparison error");
    }

    sTrimmable = L"=&=&=01234&&&==";
    sTrimmable.TrimLeft (L"=&");
    if (sTrimmable != L"01234&&&==")
    {
        ERROR_LOG (L"Trim or comparison error");
    }

    sTrimmable.Trim (L"=&");
    if (sTrimmable != L"01234")
    {
        ERROR_LOG (L"Trim or comparison error");
    }

    sTrimmable = L"=&=&=01234&&&==";
    sTrimmable.Trim (L"=&");
    if (sTrimmable != L"01234")
    {
        ERROR_LOG (L"Trim or comparison error");
    }

    CEString sReversable (L"0123456789");
    sReversable.Reverse();
    if (sReversable != L"9876543210")
    {
        ERROR_LOG (L"Reversing error");
    }

    CEString sWhole (L"0123456789");
    CEString sSubstr = sWhole.sSubstr (1, 3);
    if (sSubstr != L"123")
    {
        ERROR_LOG (L"Trim or comparison error");
    }

    sSubstr = sWhole.sSubstr (7);
    if (sSubstr != L"789")
    {
        ERROR_LOG (L"Trim or comparison error");
    }

    CEString sFields (L"123 456 789");
    sFields.SetBreakChars (L" ");
    CEString sField = sFields.sGetField (1);
    if (sField != L"456")
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    StToken stToken = sFields.stGetField (0);
    if (3 != stToken.uiLength || 0 != stToken.uiOffset || ecTokenText != stToken.eType)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    stToken = sFields.stGetField (0, ecTokenSpace);
    if (1 != stToken.uiLength || 3 != stToken.uiOffset || ecTokenSpace != stToken.eType)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    try
    {
//        stToken = sFields.stGetField (99);
//        ERROR_LOG (L"Tokenizer or comparison error");   // Exception expected
    }
    catch (CException& ex)
    {
//        ::MessageBox (NULL, ex.sGetDescription().c_str(), L"Kai Exception", MB_ICONWARNING);
    }

//    ST_Token st_GetFieldFromOffset (int i_offset,
//                                    et_TokenType eo_type = ec_TokenText);

    stToken = sFields.stGetTokenFromOffset (6);
    if (3 != stToken.uiLength || 4 != stToken.uiOffset || ecTokenText != stToken.eType)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    ETokenType eType = sFields.eGetTokenType (1);
//    et_TokenType eo_GetTokenType (int i_offset, int i_at);
    if (ecTokenBreakChars != eType)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    stToken = sFields.stGetToken (1);
    if (1 != stToken.uiLength || 3 != stToken.uiOffset || ecTokenSpace != stToken.eType)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    const StToken& rstToken = sFields.rstGetToken (1);
    if (1 != stToken.uiLength || 3 != stToken.uiOffset || ecTokenSpace != stToken.eType)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    CEString sToken = sFields.sGetToken (1);
    if (sToken != L" ")
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    try
    {
        CEString sToken1 = sFields.sGetToken(999);
        if (sToken1 != L" ")
        {
            ERROR_LOG(L"Tokenizer or comparison error");
        }
    }
    catch (CException ex)
    {
        CEString sMsg(L"Exception: ");
        sMsg += ex.szGetDescription();
        ERROR_LOG(sMsg);
    }

    bool b_ = sFields.bGetNextToken(stToken);
    if (!b_ || ecTokenText != stToken.eType || 4 != stToken.uiOffset || 3 != stToken.uiLength)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    b_ = sFields.bGetPrevToken (stToken);
    if (!b_ || ecTokenBreakChars != stToken.eType || 3 != stToken.uiOffset || 1 != stToken.uiLength)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    unsigned int uiTokenNum = sFields.uiGetTokenNum (stToken);
    if (1 != uiTokenNum)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    unsigned int uiFields = sFields.uiGetNumOfFields();
    if (3 != uiFields)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    uiFields = sFields.uiGetNumOfFields (ecTokenSpace);
    if (2 != uiFields)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

//    uiFields = sFields.uiGetNumOfFields (3, 6);

    uiFields = sFields.uiNFields();
    if (3 != uiFields)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

//    uiFields = sFields.uiNFields (3, 6); 

    unsigned int uiTokens = sFields.uiGetNumOfTokens();
    if (5 != uiTokens)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    uiTokens = sFields.uiNTokens();
    if (5 != uiTokens)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    unsigned int uiVLength = sFields.uiGetVisibleLength();
    if (11 != uiVLength)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

    unsigned int uiFLength = sFields.uiGetFieldLength (1);
    if (3 != uiFLength)
    {
        ERROR_LOG (L"Tokenizer or comparison error");
    }

//    CEString s (L"abcdefg");
//    wchar_t * szData = (wchar_t*)s;

    CEString sSyllables (L"бавогузюы");
    sSyllables.SetVowels (L"аеёиоуыэюя");
    unsigned int uiSyllables = sSyllables.uiGetNumOfSyllables();
    if (5 != uiSyllables)
    {
        ERROR_LOG (L"Syllable count error");
    }

    uiSyllables = sSyllables.uiNSyllables();
    if (5 != uiSyllables)
    {
        ERROR_LOG (L"Syllable count error");
    }


    // Vowels & consonants
    unsigned int uiVowelPos = sSyllables.uiGetVowelPos (3);
    if (7 != uiVowelPos)
    {
        ERROR_LOG (L"Vowel position error");
    }
    
    unsigned int uiSyllPos = sSyllables.uiGetSyllableFromVowelPos (7);
    if (3 != uiSyllPos)
    {
        ERROR_LOG (L"Syllable position error");
    }

    {
        CEString sConvert = CEString::sToString (9999999999999);
        if (L"9999999999999" != sConvert)
        {
            ERROR_LOG(L"Large int conversion error");
        }
        int i_ = 999999;
        sConvert = CEString::sToString (i_);
        if (L"999999" != sConvert)
        {
            ERROR_LOG(L"Int conversion error");
        }
    }

    {
        CEString sConvert = CEString::sToString(999999999.9999);
        double d_ = 999999.999;
        sConvert = CEString::sToString(d_);
    }

    //
    // Done!
    //
    CLogger::pGetInstance()->Flush();

_CrtDumpMemoryLeaks();


}

Ejemplo n.º 13

Mostrar archivo

Archivo: LexPreprocessor.cpp Proyecto: kbogatyrev/Zal-Windows

int CT_LexPreprocessor::iClassifyStems()
// For every endings subtable, looks for the stems usable with it and
// stores up to NUM_SFX their longest common suffixes in the database
{
    if (m_pDb == NULL)
    {
        return -1;
    }
    const int MIN_NUMBER_OF_STEMS = 70;
    const int NUM_SFX = 5;
    const int MAX_NUM_SFX = 24;

    CEString sQuery, sStem;
    CEString *arr_sStems;
    CEString **parr_sSfx;
    vector<CEString> vecStems;
    int iLastSubtable = 0, iStem;

    iLastSubtable = m_pDb->iLastID(L"endings_meta");
    for (int iSubtable = 0; iSubtable <= iLastSubtable; ++iSubtable)
    {
        vecStems.clear();
        CEString sFirstLemma = L"";
        int iCutRight = 0;
        CEString sLemmaEnding = L"";

        vLongStemsBySubtable(iSubtable, 2, vecStems, sFirstLemma);
        if (vecStems.size() < MIN_NUMBER_OF_STEMS)
        {
            continue;
        }

        // Find the longest common prefix of the first stem and the corresponding lemma
        CEString* arr_sStemAndLemma;
        CEString** parr_sPfx;
        arr_sStemAndLemma = new CEString[2];
        arr_sStemAndLemma[0] = vecStems[0];
        arr_sStemAndLemma[1] = sFirstLemma;
        parr_sPfx = new CEString*;
        *parr_sPfx = new CEString[1];
        int iPfx = iLCP(arr_sStemAndLemma, parr_sPfx, 2, 1);
        if (iPfx <= 0)
        {
            continue;
        }
        CEString sCommonPfx = (*parr_sPfx)[0];
        iCutRight = vecStems[0].uiLength() - sCommonPfx.uiLength();
        if (iCutRight >= 4)
        {
            continue;
        }
        sLemmaEnding = sFirstLemma.sSubstr(sCommonPfx.uiLength(), sFirstLemma.uiLength() - sCommonPfx.uiLength());

        // Find longest common suffixes of the stems found
        iStem = 0;
        arr_sStems = new CEString[vecStems.size()];
        parr_sSfx = new CEString*;
        *parr_sSfx = new CEString[1];
        for (vector<CEString>::iterator iterStems = vecStems.begin();
            iterStems != vecStems.end();
            ++iterStems, ++iStem)
        {
            // We reverse the stem so that i_LCP could find suffixes
            // instead of prefixes
//            reverse((*iter_stems).begin(), (*iter_stems).end());
            (*iterStems).Reverse();
            arr_sStems[iStem] = *iterStems;
        }

        // several attemps
        int iSfx = 0;
        int iMaxSfx = NUM_SFX;
        while (iSfx <= 0 && iMaxSfx <= MAX_NUM_SFX)
        {
            delete[] *parr_sSfx;
            delete parr_sSfx;
            parr_sSfx = new CEString*;
            *parr_sSfx = new CEString[1];
            iSfx = iLCP(arr_sStems, parr_sSfx, vecStems.size(), iMaxSfx);
            if (iSfx == 1 && (*parr_sSfx)[0].uiLength() <= 0)
            {
                iSfx = 0;
            }
            iMaxSfx += 2;
        }
        vInsertCommonSfx(parr_sSfx, iSfx, iSubtable, vecStems.size(), iCutRight, sLemmaEnding);

        delete[] arr_sStems;
        delete[] *parr_sSfx;
        delete parr_sSfx;

        // TEST
        //if (i_subtable > 100)
        //{
        //    break;
        //}
    }
    return 0;
}

Ejemplo n.º 14

Mostrar archivo

Archivo: Lexeme.cpp Proyecto: kbogatyrev/Zal-Windows

ET_ReturnCode CLexeme::eLoadIrregularForms()
{
    ET_ReturnCode rc = H_NO_ERROR;

    if (!m_stProperties.bHasIrregularForms)
    {
        return H_FALSE;
    }

    m_stProperties.bHasIrregularVariants = false;

    CEString sQuery 
        (L"SELECT id, gram_hash, wordform, is_alternative FROM irregular_forms WHERE descriptor_id = ");
    sQuery += CEString::sToString (m_stProperties.iDbKey);
    sQuery += L";";

    CSqlite * pDb = NULL;

    m_mmapIrregularForms.clear();

    try
    {
        pDb = m_pDictionary->pGetDbHandle();            
        unsigned int uiQueryHandle = pDb->uiPrepareForSelect (sQuery);
        while (pDb->bGetRow(uiQueryHandle))
        {
            //StIrregularForm stForm;
            int iId = -1;
            int iHash = -1;
            CEString sForm;
            bool bIsVariant = false;
            pDb->GetData (0, iId, uiQueryHandle);
            pDb->GetData (1, iHash, uiQueryHandle);
            pDb->GetData (2, sForm, uiQueryHandle);
            pDb->GetData (3, bIsVariant, uiQueryHandle);

            if (bIsVariant)
            {
                m_stProperties.bHasIrregularVariants = true;
            }

            CEString sStressQuery (L"SELECT position, is_primary FROM irregular_stress WHERE form_id = ");
            sStressQuery += CEString::sToString (iId);
            sStressQuery += L";";

            CWordForm * pWf = new CWordForm(iHash);
            pWf->m_pLexeme = this;
            pWf->m_bIrregular = true;
            pWf->m_sWordForm = sForm;

            unsigned int uiStressHandle = pDb->uiPrepareForSelect (sStressQuery);
            while (pDb->bGetRow (uiStressHandle))
            {
                int iPos = -1;
                bool bPrimary = false;
                pDb->GetData (0, iPos, uiStressHandle);
                pDb->GetData (1, bPrimary, uiStressHandle);
                int iStressedSyll = sForm.uiGetSyllableFromVowelPos (iPos);
                pWf->m_mapStress[iStressedSyll] = bPrimary ? STRESS_PRIMARY : STRESS_SECONDARY;
            }
            pDb->Finalize (uiStressHandle);

            StIrregularForm stIf(pWf, bIsVariant);
            pair<int, StIrregularForm> pairHashToWordForm (iHash, stIf);
            m_mmapIrregularForms.insert (pairHashToWordForm);

        }   //  while (pDb->b_GetRow())

        pDb->Finalize(uiQueryHandle);
    }
    catch (CException ex)
    {
        ERROR_LOG (ex.szGetDescription());
        rc = H_EXCEPTION;
    }
    catch (...)
    {
        CEString sMsg;
        CEString sError;
        try
        {
            pDb->GetLastError (sError);
            sMsg += CEString (L", error %d: ");
            sMsg += sError;
        }
        catch (...)
        {
            sMsg = L"Apparent DB error ";
        }
    
        sMsg += CEString::sToString(pDb->iGetLastError());
        ERROR_LOG (sMsg);
        rc = H_EXCEPTION;
    }

    return rc;

}   //  eGetIrregularForms()