// Compute checksum of rectangular region specified within an image void ComputeChecksum(char checkSumString[64], vx_image image, vx_rectangle_t * rectRegion) { // get number of planes vx_df_image format = VX_DF_IMAGE_VIRT; vx_size num_planes = 0; ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_FORMAT, &format, sizeof(format))); ERROR_CHECK(vxQueryImage(image, VX_IMAGE_ATTRIBUTE_PLANES, &num_planes, sizeof(num_planes))); // compute checksum CHasher checksum; checksum.Initialize(); for (vx_uint32 plane = 0; plane < (vx_uint32)num_planes; plane++) { vx_imagepatch_addressing_t addr; vx_uint8 * base_ptr = nullptr; ERROR_CHECK(vxAccessImagePatch(image, rectRegion, plane, &addr, (void **)&base_ptr, VX_READ_ONLY)); vx_uint32 width = ((addr.dim_x * addr.scale_x) / VX_SCALE_UNITY); vx_uint32 height = ((addr.dim_y * addr.scale_y) / VX_SCALE_UNITY); vx_uint32 width_in_bytes = (format == VX_DF_IMAGE_U1_AMD) ? ((width + 7) >> 3) : (width * addr.stride_x); for (vx_uint32 y = 0; y < height; y++) { checksum.Process(base_ptr + y * addr.stride_y, width_in_bytes); } ERROR_CHECK(vxCommitImagePatch(image, rectRegion, plane, &addr, base_ptr)); } // copy the checksum string strcpy(checkSumString, checksum.GetCheckSum()); }
int CAnalyzer::iCheckEndings(vector<CHasher>& vecPossibleWordforms, vector<stStemLinks>& vecStems, CEString sLeft, CEString sRight, int iStressPosEnding) // If pvec_stems_id IS NOT empty: // For every stem in pvec_stems_id, take the corresponding endings table // and look whether it contains an ending equal to sRight; // for every such ending, add a wordform to vecPossibleWordforms. // // If pvec_stems_id IS empty: // Look for an ending equal to sRight; for every such ending, // build a wordform and store it in vecPossibleWordforms. // (Identical wordforms are stored as one wordform.) { if (m_pDb == NULL) // || vecStems == NULL) { return -1; } static vector<int> vecGram; CEString str_query, sLemma; vector<CEString> vecLemma; for (vector<stStemLinks>::iterator itStems = vecStems.begin(); itStems != vecStems.end(); itStems++) { // For each *itStems look up the endings table ID in DB, then in this table try to find // endings which are equal to sRight. For each ending found, write the parameters // to tmpWf and then push_back tmpWf to vecPossibleWordforms: vecGram.clear(); vecGram = arr_freq_endings[(*itStems).iEndingsLink].m_vecFind (sRight, iStressPosEnding); if (vecGram.empty()) { continue; } for (vector<int>::iterator iter_endings = vecGram.begin(); iter_endings != vecGram.end(); iter_endings++) { CHasher tmpWf; tmpWf.hDecodeHash(*iter_endings); tmpWf.m_llLexemeId = (*itStems).llLexemeId; tmpWf.m_sLemma = (*itStems).sLemma; //tmpWf.str_WordForm = sLeft + sRight; //h_AddClassifyingCategories(&tmpWf); vecPossibleWordforms.push_back (tmpWf); } vecLemma.clear(); // that vector is different for every stem found } if (vecStems.empty()) // Try to guess the lexeme { if (sLeft.uiLength() <= 2) { return 0; } vector<int> vec_i_possible_ETs; // pair<unordered_multimap<wstring, int>::iterator, // unordered_multimap<wstring, int>::iterator> pair_search_result = umap_endings2subtbl.equal_range((wstring)sRight); pair<multimap<CEString, int>::iterator, multimap<CEString, int>::iterator> pair_search_result = umap_endings2subtbl.equal_range(sRight); for (; pair_search_result.first != pair_search_result.second; ++pair_search_result.first) { vec_i_possible_ETs.push_back(pair_search_result.first->second); } for (vector<int>::iterator iter_ET = vec_i_possible_ETs.begin(); iter_ET != vec_i_possible_ETs.end(); ++iter_ET) { if (arr_freq_endings[*iter_ET].m_sStemFinal.uiLength() > 0 && // !regex_match(sLeft, (const wregex)(L"^.*(" + arr_freq_endings[*iter_ET].m_sStemFinale + L")$"))) !sLeft.bRegexMatch (L"^.*(" + arr_freq_endings[*iter_ET].m_sStemFinal + L")$")) { continue; } if (sLeft.uiLength() <= arr_freq_endings[*iter_ET].m_iCutRight) { continue; } vecGram.clear(); vecGram = arr_freq_endings[*iter_ET].m_vecFind(sRight, -2); if (vecGram.empty()) { continue; } for (vector<int>::iterator itHash = vecGram.begin(); itHash != vecGram.end(); ++itHash) { CHasher tmpWf; tmpWf.m_sLemma = sLeft.sSubstr(0, sLeft.uiLength() - arr_freq_endings[*iter_ET].m_iCutRight) + arr_freq_endings[*iter_ET].m_sLemmaFinal; if (!bIsValidLemma (tmpWf.m_sLemma)) { continue; } // Check if what we've found is a new wordform bool bExists = false; for (vector<CHasher>::iterator itWf = vecPossibleWordforms.begin(); itWf != vecPossibleWordforms.end(); ++itWf) { if ((*itWf).m_sLemma == tmpWf.m_sLemma && (*itWf).iGramHash() == *itHash) { bExists = true; } } if (!bExists) { tmpWf.hDecodeHash(*itHash); tmpWf.m_llLexemeId = 0; vecPossibleWordforms.push_back(tmpWf); } } } } vecGram.clear(); return 0; }
ET_ReturnCode CFormBuilderNouns::eBuild() { ASSERT(m_pLexeme); // we assume base class ctor took care of this ET_ReturnCode rc = H_NO_ERROR; m_pEndings = new CNounEndings(m_pLexeme); if (NULL == m_pEndings) { return H_ERROR_POINTER; } if (rc != H_NO_ERROR) { return rc; } ET_Animacy eAnimacy = m_pLexeme->eAnimacy(); ET_Gender eoGender = m_pLexeme->eGender(); CHasher gramIterator; gramIterator.Initialize(eoGender, eAnimacy); do { if ((L"мн." == m_pLexeme->sMainSymbol()) && (gramIterator.m_eNumber == NUM_SG)) { continue; } if (NUM_PL == gramIterator.m_eNumber && (CASE_PART == gramIterator.m_eCase || CASE_LOC == gramIterator.m_eCase)) { continue; } if (CASE_PART == gramIterator.m_eCase && !m_pLexeme->bSecondGenitive()) { continue; } if (CASE_LOC == gramIterator.m_eCase && !m_pLexeme->bSecondLocative()) { continue; } // Handle acc ending ET_Case eEndingCase = gramIterator.m_eCase; if (CASE_ACC == gramIterator.m_eCase) { rc = eHandleAccEnding (gramIterator.m_eNumber, eEndingCase); if (rc != H_NO_ERROR) { return rc; } } if (CASE_PART == gramIterator.m_eCase || CASE_LOC == gramIterator.m_eCase) { eEndingCase = CASE_DAT; } CEString sLemma (m_pLexeme->sGraphicStem()); if (m_pLexeme->bHasIrregularForms()) { bool bSkipRegular = false; rc = eCheckIrregularForms (gramIterator.m_eGender, gramIterator.m_eAnimacy, gramIterator.m_eCase, eEndingCase, gramIterator.m_eNumber, bSkipRegular); if (rc != H_NO_ERROR) { return rc; } if (bSkipRegular) { // Workaround for lack of "исх. форма иррег." mark in current source if (GENDER_M == gramIterator.m_eGender && NUM_SG == gramIterator.m_eNumber && CASE_NOM == gramIterator.m_eCase) { m_bIrregularSourceForm = true; } continue; } } rc = eHandleStemAugment (sLemma, gramIterator.m_eNumber, gramIterator.m_eCase); if (rc != H_NO_ERROR) { return rc; } ET_StressLocation eStress = STRESS_LOCATION_UNDEFINED; if (CASE_LOC == gramIterator.m_eCase) { eStress = STRESS_LOCATION_ENDING; } else { rc = eGetStressType (gramIterator.m_eNumber, eEndingCase, eStress); if (rc != H_NO_ERROR) { return rc; } } ((CNounEndings *)m_pEndings)->eSelect(gramIterator.m_eNumber, eEndingCase, eStress); int iNumEndings = m_pEndings->iCount(); if (iNumEndings < 1) { if (m_pLexeme->iType() != 0) { ASSERT(0); ERROR_LOG(L"No endings"); } continue; } CEString sSavedLemma (sLemma); // lemma can change, e.g. because of a fleetimg vowel for (int iEnding = 0; iEnding < iNumEndings; ++iEnding, sLemma = sSavedLemma) { // Get ending and modify as necessary CEString sEnding; unsigned __int64 llEndingKey = -1; rc = m_pEndings->eGetEnding(iEnding, sEnding, llEndingKey); if (rc != H_NO_ERROR) { return rc; } if (8 == m_pLexeme->iType() && GENDER_N != m_pLexeme->eGender()) { if (sLemma.bEndsWithOneOf (L"шжчщц")) { if (sEnding.bStartsWith (L"я")) { continue; } } else { if (sEnding.bStartsWith (L"а")) { continue; } } } bool bHasFleetingVowel = false; rc = eFleetingVowelCheck (gramIterator.m_eNumber, eEndingCase, gramIterator.m_eGender, eStress, SUBPARADIGM_NOUN, sEnding, sLemma); if (rc != H_NO_ERROR) { continue; } vector<int> vecStress; rc = eGetStressPositions (sLemma, sEnding, eStress, vecStress); if (rc != H_NO_ERROR) { continue; } CWordForm * pWordForm = NULL; rc = eCreateFormTemplate (gramIterator.m_eNumber, gramIterator.m_eCase, sLemma, pWordForm); if (rc != H_NO_ERROR) { continue; } if (1 == vecStress.size() || m_pLexeme->bIsMultistressedCompound()) { vector<int>::iterator itStressPos = vecStress.begin(); for (; itStressPos != vecStress.end(); ++itStressPos) { pWordForm->m_mapStress[*itStressPos] = STRESS_PRIMARY; // primary rc = eHandleYoAlternation (eStress, *itStressPos, pWordForm->m_sLemma, sEnding); if (rc != H_NO_ERROR) { continue; } pWordForm->m_sEnding = sEnding; pWordForm->m_llEndingDataId = llEndingKey; pWordForm->m_sWordForm = pWordForm->m_sLemma + sEnding; } m_pLexeme->AddWordForm (pWordForm); } else { vector<int>::iterator itStressPos = vecStress.begin(); for (; itStressPos != vecStress.end(); ++itStressPos) { if (itStressPos != vecStress.begin()) { CWordForm * pwfVariant = NULL; CloneWordForm (pWordForm, pwfVariant); pwfVariant->m_mapStress.clear(); pWordForm = pwfVariant; } pWordForm->m_mapStress[*itStressPos] = STRESS_PRIMARY; // primary rc = eHandleYoAlternation (eStress, *itStressPos, pWordForm->m_sLemma, sEnding); if (rc != H_NO_ERROR) { continue; } pWordForm->m_sWordForm = pWordForm->m_sLemma + sEnding; pWordForm->m_sEnding = sEnding; pWordForm->m_llEndingDataId = llEndingKey; m_pLexeme->AddWordForm (pWordForm); } } } // for (int iEnding = 0; ... ) } while (gramIterator.bIncrement()); return H_NO_ERROR; } // eBuildNounForms()
ET_ReturnCode CFormBuilderPronounAdj::eBuild() { ASSERT(m_pLexeme); // we assume base class ctor took care of this ET_ReturnCode rc = H_NO_ERROR; m_pEndings = new CAdjPronounEndings(m_pLexeme); if (NULL == m_pEndings) { return H_ERROR_POINTER; } CHasher gramTmp; gramTmp.Initialize (POS_PRONOUN_ADJ); gramTmp.SetParadigm (SUBPARADIGM_PRONOUN_ADJ); do { bool bSkipRegular = false; if (m_pLexeme->bHasIrregularForms()) { int iPronAdj = iGramHash (m_pLexeme->ePartOfSpeech(), gramTmp.m_eCase, gramTmp.m_eNumber, gramTmp.m_eGender, gramTmp.m_eAnimacy); if (m_pLexeme->bHasIrregularForm(iPronAdj)) { map<CWordForm *, bool> mapPronAdj; rc = m_pLexeme->eGetIrregularForms(iPronAdj, mapPronAdj); if (rc != H_NO_ERROR) { return rc; } map<CWordForm *, bool>::iterator it = mapPronAdj.begin(); for (; it != mapPronAdj.end(); ++it) { CWordForm * pWordForm = NULL; rc = eCreateFormTemplate(gramTmp.m_eGender, gramTmp.m_eNumber, gramTmp.m_eCase, gramTmp.m_eAnimacy, L"", -1, pWordForm); if (rc != H_NO_ERROR) { continue; } pWordForm->m_sWordForm = (*it).first->m_sWordForm; pWordForm->m_mapStress = (*it).first->m_mapStress; m_pLexeme->AddWordForm(pWordForm); bSkipRegular = !(*it).second; // i.e. no regular variant } } } // if (m_pLexeme->m_bHasIrregularForms) if (bSkipRegular) { continue; } ET_StressLocation eStressType = STRESS_LOCATION_UNDEFINED; switch (m_pLexeme->eAccentType1()) { case AT_A: { eStressType = STRESS_LOCATION_STEM; break; } case AT_B: { eStressType = STRESS_LOCATION_ENDING; break; } case AT_F: { if (NUM_SG == gramTmp.m_eNumber) { eStressType = STRESS_LOCATION_ENDING; } else if (CASE_NOM == gramTmp.m_eCase) { eStressType = STRESS_LOCATION_STEM; } else if (CASE_ACC == gramTmp.m_eCase && ANIM_NO == gramTmp.m_eAnimacy) { eStressType = STRESS_LOCATION_STEM; } else { eStressType = STRESS_LOCATION_ENDING; } break; } default: { ASSERT(0); ERROR_LOG (L"Illegal accent type."); return H_ERROR_UNEXPECTED; } } // switch (m_pLexeme->eAccentType1()) rc = ((CAdjPronounEndings *)m_pEndings)->eSelect(gramTmp.m_eGender, gramTmp.m_eNumber, gramTmp.m_eCase, gramTmp.m_eAnimacy); int iNumEndings = m_pEndings->iCount(); if (iNumEndings < 1) { ERROR_LOG (L"No endings"); continue; } for (int iEnding = 0; iEnding < iNumEndings; ++iEnding) { CEString sEnding; unsigned __int64 llEndingKey = -1; rc = m_pEndings->eGetEnding(iEnding, sEnding, llEndingKey); if (rc != H_NO_ERROR) { return rc; } if (m_pLexeme->bFleetingVowel() && (gramTmp.m_eCase != CASE_NOM || gramTmp.m_eGender != GENDER_M) && (gramTmp.m_eCase != CASE_ACC || gramTmp.m_eGender != GENDER_M || gramTmp.m_eAnimacy != ANIM_NO)) { rc = eFleetingVowelCheck (gramTmp.m_eNumber, gramTmp.m_eCase, gramTmp.m_eGender, eStressType, SUBPARADIGM_LONG_ADJ, sEnding, m_sLemma); if (rc != H_NO_ERROR) { return rc; } } CWordForm * pWordForm = NULL; rc = eCreateFormTemplate(gramTmp.m_eGender, gramTmp.m_eNumber, gramTmp.m_eCase, gramTmp.m_eAnimacy, sEnding, llEndingKey, pWordForm); if (rc != H_NO_ERROR) { return rc; } vector<int> vecStress; rc = eGetStressPositions (sEnding, eStressType, vecStress); if (rc != H_NO_ERROR) { return rc; } if (1 == vecStress.size() || m_pLexeme->bIsMultistressedCompound()) { vector<int>::iterator itStressPos = vecStress.begin(); for (; itStressPos != vecStress.end(); ++itStressPos) { pWordForm->m_mapStress[*itStressPos] = STRESS_PRIMARY; } m_pLexeme->AddWordForm (pWordForm); } else { vector<int>::iterator itStressPos = vecStress.begin(); for (; itStressPos != vecStress.end(); ++itStressPos) { if (itStressPos != vecStress.begin()) { CWordForm * pWfVariant = NULL; CloneWordForm (pWordForm, pWfVariant); pWordForm = pWfVariant; } pWordForm->m_mapStress[*itStressPos] = STRESS_PRIMARY; m_pLexeme->AddWordForm (pWordForm); } } } } while (gramTmp.bIncrement()); return H_NO_ERROR; } // eBuild (...)