void CMultiWordCreator::CreateWordIndexes(const yset<SArtPointer>& artPointers, yvector<SWordHomonymNum>& WordIndexes, bool bAddEndOfStreamSymbol) { for (yset<SArtPointer>::const_iterator it = artPointers.begin(); it != artPointers.end(); ++it) FindKWWords(*it, KW_DICT); InitWordIndexes(WordIndexes); //использую алгоритм SolveAmbiguity и соответственно структуру COccurrence //COccurrence.m_GrammarRuleNo - номер multiWord yvector<COccurrence> multiWordsOccurrences; for (size_t i = 0; i < m_Words.GetMultiWordsCount(); ++i) { const CWord& pW = m_Words.GetMultiWord(i); yset<SArtPointer>::const_iterator it; for (it = artPointers.begin(); it != artPointers.end(); ++it) if (pW.HasArticle(*it)) break; if (it == artPointers.end()) continue; COccurrence o(pW.GetSourcePair().FirstWord(), pW.GetSourcePair().LastWord() + 1, i); multiWordsOccurrences.push_back(o); } SolveAmbiguity(multiWordsOccurrences); yvector<SWordHomonymNum> multiWords; for (size_t i = 0; i < multiWordsOccurrences.size(); i++) { SWordHomonymNum tmp(multiWordsOccurrences[i].m_GrammarRuleNo,-1, false); SubstituteByMultiWord(tmp, WordIndexes); } if (bAddEndOfStreamSymbol) WordIndexes.push_back(m_Words.GetEndWordWH()); }
void SolveAmbiguity(yvector<COccurrence>& Occurrences) { yvector<size_t> res; SolveAmbiguity(Occurrences, res); if (res.size() != Occurrences.size()) { yvector<COccurrence> tmp(res.size()); for (size_t i = 0; i < res.size(); ++i) tmp[i] = Occurrences[res[i]]; Occurrences.swap(tmp); } }
void SolveAmbiguity(yvector<COccurrence>& Occurrences, yvector<COccurrence>& DroppedOccurrences) { yvector<size_t> res; SolveAmbiguity(Occurrences, res); if (res.size() != Occurrences.size()) { std::sort(res.begin(), res.end()); yvector<COccurrence> tmp(res.size()); for (size_t i = 0; i < res.size(); ++i) tmp[i] = Occurrences[res[i]]; size_t next_res_index = 0; res.push_back(Occurrences.size()); for (size_t j = 0; j < Occurrences.size(); ++j) if (j == res[next_res_index]) next_res_index += 1; else DroppedOccurrences.push_back(Occurrences[j]); Occurrences.swap(tmp); } }