Ejemplo n.º 1
0
// Compute checksum of rectangular region specified within an image
void ComputeChecksum(char checkSumString[64], vx_image image, vx_rectangle_t * rectRegion)
{
	// get number of planes
	vx_df_image format = VX_DF_IMAGE_VIRT;
	vx_size num_planes = 0;
	ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_FORMAT, &format, sizeof(format)));
	ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_PLANES, &num_planes, sizeof(num_planes)));
	// compute checksum
	CHasher checksum; checksum.Initialize();
	for (vx_uint32 plane = 0; plane < (vx_uint32)num_planes; plane++) {
		vx_imagepatch_addressing_t addr;
		vx_uint8 * base_ptr = nullptr;
		ERROR_CHECK(vxAccessImagePatch(image, rectRegion, plane, &addr, (void **)&base_ptr, VX_READ_ONLY));
		vx_uint32 width = ((addr.dim_x * addr.scale_x) / VX_SCALE_UNITY);
		vx_uint32 height = ((addr.dim_y * addr.scale_y) / VX_SCALE_UNITY);
		vx_uint32 width_in_bytes = (format == VX_DF_IMAGE_U1_AMD) ? ((width + 7) >> 3) : (width * addr.stride_x);
		for (vx_uint32 y = 0; y < height; y++) {
			checksum.Process(base_ptr + y * addr.stride_y, width_in_bytes);
		}
		ERROR_CHECK(vxCommitImagePatch(image, rectRegion, plane, &addr, base_ptr));
	}
	// copy the checksum string
	strcpy(checkSumString, checksum.GetCheckSum());
}
Ejemplo n.º 2
0
int CAnalyzer::iCheckEndings(vector<CHasher>& vecPossibleWordforms,
                             vector<stStemLinks>& vecStems,
                             CEString sLeft,
                             CEString sRight,
                             int iStressPosEnding)
// If pvec_stems_id IS NOT empty:
// For every stem in pvec_stems_id, take the corresponding endings table
// and look whether it contains an ending equal to sRight;
// for every such ending, add a wordform to vecPossibleWordforms.
//
// If pvec_stems_id IS empty:
// Look for an ending equal to sRight; for every such ending,
// build a wordform and store it in vecPossibleWordforms.
// (Identical wordforms are stored as one wordform.)
{
    if (m_pDb == NULL) // || vecStems == NULL)
    {
        return -1;
    }
    static vector<int> vecGram;
    CEString str_query, sLemma;
    vector<CEString> vecLemma;

    for (vector<stStemLinks>::iterator itStems = vecStems.begin();
        itStems != vecStems.end(); itStems++)
    {
        // For each *itStems look up the endings table ID in DB, then in this table try to find
        // endings which are equal to sRight. For each ending found, write the parameters
        // to tmpWf and then push_back tmpWf to vecPossibleWordforms:
        vecGram.clear();
        vecGram = arr_freq_endings[(*itStems).iEndingsLink].m_vecFind (sRight, iStressPosEnding);
        if (vecGram.empty())
        {
            continue;
        }
        for (vector<int>::iterator iter_endings = vecGram.begin();
            iter_endings != vecGram.end(); iter_endings++)
        {
            CHasher tmpWf;
            tmpWf.hDecodeHash(*iter_endings);
            tmpWf.m_llLexemeId = (*itStems).llLexemeId;
            tmpWf.m_sLemma = (*itStems).sLemma;
            //tmpWf.str_WordForm = sLeft + sRight;
            //h_AddClassifyingCategories(&tmpWf);
            vecPossibleWordforms.push_back (tmpWf);
        }
        vecLemma.clear(); // that vector is different for every stem found
    }

    if (vecStems.empty())
    // Try to guess the lexeme
    {
        if (sLeft.uiLength() <= 2)
        {
            return 0;
        }
        vector<int> vec_i_possible_ETs;
//        pair<unordered_multimap<wstring, int>::iterator,
//             unordered_multimap<wstring, int>::iterator> pair_search_result = umap_endings2subtbl.equal_range((wstring)sRight);
        pair<multimap<CEString, int>::iterator,
             multimap<CEString, int>::iterator> pair_search_result = umap_endings2subtbl.equal_range(sRight);
        for (; pair_search_result.first != pair_search_result.second; ++pair_search_result.first)
        {
            vec_i_possible_ETs.push_back(pair_search_result.first->second);
        }
        for (vector<int>::iterator iter_ET = vec_i_possible_ETs.begin();
             iter_ET != vec_i_possible_ETs.end();
             ++iter_ET)
        {
            if (arr_freq_endings[*iter_ET].m_sStemFinal.uiLength() > 0 &&
//                !regex_match(sLeft, (const wregex)(L"^.*(" + arr_freq_endings[*iter_ET].m_sStemFinale + L")$")))
                !sLeft.bRegexMatch (L"^.*(" + arr_freq_endings[*iter_ET].m_sStemFinal + L")$"))
            {
                continue;
            }
            if (sLeft.uiLength() <= arr_freq_endings[*iter_ET].m_iCutRight)
            {
                continue;
            }
            vecGram.clear();
            vecGram = arr_freq_endings[*iter_ET].m_vecFind(sRight, -2);
            if (vecGram.empty())
            {
                continue;
            }
            for (vector<int>::iterator itHash = vecGram.begin();
                 itHash != vecGram.end(); ++itHash)
            {
                CHasher tmpWf;
                tmpWf.m_sLemma = sLeft.sSubstr(0, sLeft.uiLength() - arr_freq_endings[*iter_ET].m_iCutRight) + arr_freq_endings[*iter_ET].m_sLemmaFinal;
                if (!bIsValidLemma (tmpWf.m_sLemma))
                {
                    continue;
                }
                // Check if what we've found is a new wordform
                bool bExists = false;
                for (vector<CHasher>::iterator itWf = vecPossibleWordforms.begin();
                     itWf != vecPossibleWordforms.end();
                     ++itWf)
                {
                    if ((*itWf).m_sLemma == tmpWf.m_sLemma && (*itWf).iGramHash() == *itHash)
                    {
                        bExists = true;
                    }
                }
                if (!bExists)
                {
                    tmpWf.hDecodeHash(*itHash);
                    tmpWf.m_llLexemeId = 0;
                    vecPossibleWordforms.push_back(tmpWf);
                }
            }
        }
    }
    vecGram.clear();
    return 0;
}
Ejemplo n.º 3
0
ET_ReturnCode CFormBuilderNouns::eBuild()
{
    ASSERT(m_pLexeme);   // we assume base class ctor took care of this

    ET_ReturnCode rc = H_NO_ERROR;

    m_pEndings = new CNounEndings(m_pLexeme);
    if (NULL == m_pEndings)
    {
        return H_ERROR_POINTER;
    }

    if (rc != H_NO_ERROR)
    {
        return rc;
    }

    ET_Animacy eAnimacy = m_pLexeme->eAnimacy();
    ET_Gender eoGender = m_pLexeme->eGender();

    CHasher gramIterator;
    gramIterator.Initialize(eoGender, eAnimacy);
    do
    {
        if ((L"мн." == m_pLexeme->sMainSymbol()) && (gramIterator.m_eNumber == NUM_SG))
        {
            continue;
        }

        if (NUM_PL == gramIterator.m_eNumber && 
            (CASE_PART == gramIterator.m_eCase || CASE_LOC == gramIterator.m_eCase))
        {
            continue;
        }

        if (CASE_PART == gramIterator.m_eCase && !m_pLexeme->bSecondGenitive())
        {
            continue;
        }

        if (CASE_LOC == gramIterator.m_eCase && !m_pLexeme->bSecondLocative())
        {
            continue;
        }

        // Handle acc ending
        ET_Case eEndingCase = gramIterator.m_eCase;
        if (CASE_ACC == gramIterator.m_eCase)
        {
            rc = eHandleAccEnding (gramIterator.m_eNumber, eEndingCase);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }
        }
        if (CASE_PART == gramIterator.m_eCase || CASE_LOC == gramIterator.m_eCase)
        {
            eEndingCase = CASE_DAT;
        }

        CEString sLemma (m_pLexeme->sGraphicStem());
        if (m_pLexeme->bHasIrregularForms())
        {
            bool bSkipRegular = false;
            rc = eCheckIrregularForms (gramIterator.m_eGender, 
                                       gramIterator.m_eAnimacy,
                                       gramIterator.m_eCase,
                                       eEndingCase,
                                       gramIterator.m_eNumber,
                                       bSkipRegular);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }
            if (bSkipRegular)
            {
                // Workaround for lack of "исх. форма иррег." mark in current source
                if (GENDER_M == gramIterator.m_eGender && NUM_SG == gramIterator.m_eNumber && CASE_NOM == gramIterator.m_eCase)
                {
                    m_bIrregularSourceForm = true;
                }
                continue;
            }
        }

        rc = eHandleStemAugment (sLemma, gramIterator.m_eNumber, gramIterator.m_eCase);
        if (rc != H_NO_ERROR)
        {
            return rc;
        }

        ET_StressLocation eStress = STRESS_LOCATION_UNDEFINED;
        if (CASE_LOC == gramIterator.m_eCase)
        {
            eStress = STRESS_LOCATION_ENDING;
        }
        else
        {
            rc = eGetStressType (gramIterator.m_eNumber, eEndingCase, eStress);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }
        }

        ((CNounEndings *)m_pEndings)->eSelect(gramIterator.m_eNumber, eEndingCase, eStress);
        int iNumEndings = m_pEndings->iCount();
        if (iNumEndings < 1)
        {
            if (m_pLexeme->iType() != 0)
            {
                ASSERT(0);
                ERROR_LOG(L"No endings");
            }
            continue;
        }

        CEString sSavedLemma (sLemma);      // lemma can change, e.g. because of a fleetimg vowel
        for (int iEnding = 0; iEnding < iNumEndings; ++iEnding, sLemma = sSavedLemma)
        {
            // Get ending and modify as necessary
            CEString sEnding;
            unsigned __int64 llEndingKey = -1;
            rc = m_pEndings->eGetEnding(iEnding, sEnding, llEndingKey);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }

            if (8 == m_pLexeme->iType() && GENDER_N != m_pLexeme->eGender())
            {
                if (sLemma.bEndsWithOneOf (L"шжчщц"))
                {
                    if (sEnding.bStartsWith (L"я"))
                    {
                        continue;
                    }
                }
                else
                {
                    if (sEnding.bStartsWith (L"а"))
                    {
                        continue;
                    }
                }
            }

            bool bHasFleetingVowel = false;
            rc = eFleetingVowelCheck (gramIterator.m_eNumber, 
                                      eEndingCase,
                                      gramIterator.m_eGender, 
                                      eStress,
                                      SUBPARADIGM_NOUN,
                                      sEnding,
                                      sLemma);
            if (rc != H_NO_ERROR)
            {
                continue;
            }

            vector<int> vecStress;
            rc = eGetStressPositions (sLemma, sEnding, eStress, vecStress);
            if (rc != H_NO_ERROR)
            {
                continue;
            }

            CWordForm * pWordForm = NULL;
            rc = eCreateFormTemplate (gramIterator.m_eNumber, gramIterator.m_eCase, sLemma, pWordForm);
            if (rc != H_NO_ERROR)
            {
                continue;
            }

            if (1 == vecStress.size() || m_pLexeme->bIsMultistressedCompound())
            {
                vector<int>::iterator itStressPos = vecStress.begin();
                for (; itStressPos != vecStress.end(); ++itStressPos)
                {
                    pWordForm->m_mapStress[*itStressPos] = STRESS_PRIMARY;  // primary
                    rc = eHandleYoAlternation (eStress, *itStressPos, pWordForm->m_sLemma, sEnding);
                    if (rc != H_NO_ERROR)
                    {
                        continue;
                    }
                    pWordForm->m_sEnding = sEnding;
                    pWordForm->m_llEndingDataId = llEndingKey;
                    pWordForm->m_sWordForm = pWordForm->m_sLemma + sEnding;
                }
                m_pLexeme->AddWordForm (pWordForm);                        
            }
            else
            {
                vector<int>::iterator itStressPos = vecStress.begin();
                for (; itStressPos != vecStress.end(); ++itStressPos)
                {
                    if (itStressPos != vecStress.begin())
                    {
                        CWordForm * pwfVariant = NULL;
                        CloneWordForm (pWordForm, pwfVariant);
                        pwfVariant->m_mapStress.clear();
                        pWordForm = pwfVariant;
                    }
                    pWordForm->m_mapStress[*itStressPos] = STRESS_PRIMARY;  // primary
                    rc = eHandleYoAlternation (eStress, *itStressPos, pWordForm->m_sLemma, sEnding);
                    if (rc != H_NO_ERROR)
                    {
                        continue;
                    }
                    pWordForm->m_sWordForm = pWordForm->m_sLemma + sEnding;
                    pWordForm->m_sEnding = sEnding;
                    pWordForm->m_llEndingDataId = llEndingKey;

                    m_pLexeme->AddWordForm (pWordForm);
                }
            }
        }   //  for (int iEnding = 0; ... )
    
    } while (gramIterator.bIncrement());

    return H_NO_ERROR;

}    //  eBuildNounForms()
ET_ReturnCode CFormBuilderPronounAdj::eBuild()
{
	ASSERT(m_pLexeme);   // we assume base class ctor took care of this

    ET_ReturnCode rc = H_NO_ERROR;

    m_pEndings = new CAdjPronounEndings(m_pLexeme);
    if (NULL == m_pEndings)
    {
        return H_ERROR_POINTER;
    }

    CHasher gramTmp;
    gramTmp.Initialize (POS_PRONOUN_ADJ);
    gramTmp.SetParadigm (SUBPARADIGM_PRONOUN_ADJ);
    do
    {
        bool bSkipRegular = false;
        if (m_pLexeme->bHasIrregularForms())
        {
            int iPronAdj = iGramHash (m_pLexeme->ePartOfSpeech(),
                                      gramTmp.m_eCase, gramTmp.m_eNumber,
                                      gramTmp.m_eGender, gramTmp.m_eAnimacy);

            if (m_pLexeme->bHasIrregularForm(iPronAdj))
            {
                map<CWordForm *, bool> mapPronAdj;
                rc = m_pLexeme->eGetIrregularForms(iPronAdj, mapPronAdj);
                if (rc != H_NO_ERROR)
                {
                    return rc;
                }

                map<CWordForm *, bool>::iterator it = mapPronAdj.begin();
                for (; it != mapPronAdj.end(); ++it)
                {
                    CWordForm * pWordForm = NULL;
                    rc = eCreateFormTemplate(gramTmp.m_eGender,
                                             gramTmp.m_eNumber,
                                             gramTmp.m_eCase,
                                             gramTmp.m_eAnimacy,
                                             L"",
                                             -1,
                                             pWordForm);
                    if (rc != H_NO_ERROR)
                    {
                        continue;
                    }

                    pWordForm->m_sWordForm = (*it).first->m_sWordForm;
                    pWordForm->m_mapStress = (*it).first->m_mapStress;
                    m_pLexeme->AddWordForm(pWordForm);

                    bSkipRegular = !(*it).second;  // i.e. no regular variant
                }
            }
        }       //  if (m_pLexeme->m_bHasIrregularForms)

        if (bSkipRegular)
        {
            continue;
        }

        ET_StressLocation eStressType = STRESS_LOCATION_UNDEFINED;
        switch (m_pLexeme->eAccentType1())
        {
            case AT_A:
            {
                eStressType = STRESS_LOCATION_STEM;
                break;
            }
            case AT_B:
            {
                eStressType = STRESS_LOCATION_ENDING;
                break;
            }
            case AT_F:
            {
                if (NUM_SG == gramTmp.m_eNumber)
                {
                    eStressType = STRESS_LOCATION_ENDING;
                }
                else if (CASE_NOM == gramTmp.m_eCase)
                {
                    eStressType = STRESS_LOCATION_STEM;
                }
                else if (CASE_ACC == gramTmp.m_eCase && ANIM_NO == gramTmp.m_eAnimacy)
                {
                    eStressType = STRESS_LOCATION_STEM;
                }
                else
                {
                    eStressType = STRESS_LOCATION_ENDING;
                }
                break;
            }
            default:
            {
                ASSERT(0);
                ERROR_LOG (L"Illegal accent type.");
                return H_ERROR_UNEXPECTED;
            }

        }   //  switch (m_pLexeme->eAccentType1())

        rc = ((CAdjPronounEndings *)m_pEndings)->eSelect(gramTmp.m_eGender, gramTmp.m_eNumber, gramTmp.m_eCase, gramTmp.m_eAnimacy);
        int iNumEndings = m_pEndings->iCount();
        if (iNumEndings < 1)
        {
            ERROR_LOG (L"No endings");
            continue;
        }

        for (int iEnding = 0; iEnding < iNumEndings; ++iEnding)
        {
            CEString sEnding;
            unsigned __int64 llEndingKey = -1;
            rc = m_pEndings->eGetEnding(iEnding, sEnding, llEndingKey);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }

            if (m_pLexeme->bFleetingVowel() &&
                (gramTmp.m_eCase != CASE_NOM || gramTmp.m_eGender != GENDER_M) &&
                (gramTmp.m_eCase != CASE_ACC || gramTmp.m_eGender != GENDER_M ||
                                                    gramTmp.m_eAnimacy != ANIM_NO))
            {
                rc = eFleetingVowelCheck (gramTmp.m_eNumber, gramTmp.m_eCase,
                                          gramTmp.m_eGender, eStressType,
                                          SUBPARADIGM_LONG_ADJ, sEnding, m_sLemma);
                if (rc != H_NO_ERROR)
                {
                    return rc;
                }
            }

            CWordForm * pWordForm = NULL;
            rc = eCreateFormTemplate(gramTmp.m_eGender, 
                                     gramTmp.m_eNumber, 
                                     gramTmp.m_eCase, 
                                     gramTmp.m_eAnimacy, 
                                     sEnding, 
                                     llEndingKey,
                                     pWordForm);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }

            vector<int> vecStress;
            rc = eGetStressPositions (sEnding, eStressType, vecStress);
            if (rc != H_NO_ERROR)
            {
                return rc;
            }

            if (1 == vecStress.size() || m_pLexeme->bIsMultistressedCompound())
            {
                vector<int>::iterator itStressPos = vecStress.begin();
                for (; itStressPos != vecStress.end(); ++itStressPos)
                {
                    pWordForm->m_mapStress[*itStressPos] = STRESS_PRIMARY;
                }
                m_pLexeme->AddWordForm (pWordForm);
            }
            else
            {
                vector<int>::iterator itStressPos = vecStress.begin();
                for (; itStressPos != vecStress.end(); ++itStressPos)
                {
                    if (itStressPos != vecStress.begin())
                    {
                        CWordForm * pWfVariant = NULL;
                        CloneWordForm (pWordForm, pWfVariant);
                        pWordForm = pWfVariant;
                    }
                    pWordForm->m_mapStress[*itStressPos] = STRESS_PRIMARY;
                    m_pLexeme->AddWordForm (pWordForm);
                }
            }
        }
    } while (gramTmp.bIncrement());

    return H_NO_ERROR;

}   //  eBuild (...)