void insertIntoCombSet(QSet<TextNposNmorphDiaTriplet> &alreadyProceesed, CombSet &allPossibleComb, VocalizedCombination &comb, int index, AmbiguitySolution &sol) { QString s = comb.getString(); if (comb.hasSelfInconsistency()) { qDebug() << "Found inconsistency in generated lexicon entries:\t" << s << "\tpos: " << sol.voc; return; } MorphemeDiacritics mD = sol.getMorphemeDiacriticSummary(comb); QString pos = sol.stemPOS; //QString voc=sol.voc; TextNposNmorphDiaTriplet p(s, pos/*,voc*/, mD); if (alreadyProceesed.contains(p)) { return; } alreadyProceesed.insert(p); CombSet::iterator itr = allPossibleComb.find(comb); if (itr == allPossibleComb.end()) { VocCombIndexListPair p(comb); p.indicies.insert(index); allPossibleComb.insert(p); } else { VocCombIndexListPair &p = (VocCombIndexListPair &)( *itr); //safe since changing p by adding index does not disrupt its position in set p.indicies.insert(index); } }
CombSet::iterator ReqSelector::operator() (CombSet &global_state, const Requirement &req) { std::list<CombSet::iterator> local_state_cache; for(CombSet::iterator it=global_state.begin(); it!=global_state.end();++it) local_state_cache.push_back(it); return (operator () (local_state_cache,std::set<int>(),req)).first; }
void DiacriticDisambiguationBase::analyzeOne(QString currEntry, const AmbiguitySolutionList &currSol) { AmbiguitySolutionList currSolutions[ambiguitySize]; for (int amb = 0; amb < ambiguitySize; amb++) { if ((Ambiguity)amb != All_Ambiguity) { currSolutions[amb] = getAmbiguityUnique(currSol, (Ambiguity)amb); } else { currSolutions[amb] = currSol; } } int sub_total[ambiguitySize] = {0}, sub_left[ambiguitySize] = {0}; int best_sub_Left[ambiguitySize] = {0}, worst_sub_Left[ambiguitySize] = {0}; for (int amb = 0; amb < ambiguitySize; amb++) { best_sub_Left[amb] = currSolutions[amb].size(); } { CombSet allPossibleComb; QSet<TextNposNmorphDiaTriplet> alreadyProceesed; for (int j = 0; j < currSolutions[All_Ambiguity].size(); j++) { //All_Ambiguity contains all solutions AmbiguitySolution &sol = currSolutions[All_Ambiguity][j]; QString voc = sol.voc; if (diacriticsCount > 0) { VocalizedCombinationsGenerator v(voc, diacriticsCount); if (v.isUnderVocalized()) { VocalizedCombination c = VocalizedCombination::deduceCombination(voc); insertIntoCombSet(alreadyProceesed, allPossibleComb, c, j, sol); } else { for (v.begin(); !v.isFinished(); ++v) { VocalizedCombination c = v.getCombination(); insertIntoCombSet(alreadyProceesed, allPossibleComb, c, j, sol); } } } else { //i.e. all diacritics VocalizedCombination c = VocalizedCombination::deduceCombination(voc); insertIntoCombSet(alreadyProceesed, allPossibleComb, c, j, sol); } } CombSet::iterator itr = allPossibleComb.begin(); for (; itr != allPossibleComb.end(); itr++) { VocCombIndexListPair &combIndexList = (VocCombIndexListPair &)(*itr); VocalizedCombination &c = combIndexList.comb; QString s = c.getString(); const QList<Diacritics> &d = c.getDiacritics(); //const DiacriticsPositionsList & diaPos=c.getShortList(); int numDia = c.getNumDiacritics(); if (numDia == 0) { continue; } double valid_ratio[ambiguitySize]; for (int amb = 0; amb < ambiguitySize; amb++) { int valid_count = 0; for (int j = 0; j < currSolutions[amb].size(); j++) { if (equal(s, currSolutions[amb][j].voc, true)) { valid_count++; } } valid_ratio[amb] = ((double)valid_count) / (currSolutions[amb].size()); sub_left[amb] += valid_count; worst_sub_Left[amb] = max(worst_sub_Left[amb], valid_count); best_sub_Left[amb] = min(best_sub_Left[amb], valid_count); sub_total[amb] += currSolutions[amb].size(); if (valid_ratio[amb] < 1) { reducingCombinations[amb]++; } totalCombinations[amb]++; } bool reduced = currSolutions[Vocalization].size() > 1; bool display = ((numDia == diacriticsCount || diacriticsCount == -1) && !suppressOutput && reduced); for (int i = 0; i < 2; i++) { QTextStream *o = (i == 0 ? & (theSarf->out) : &hadith_out); if (display || i == 1) { //always display for hadith_out int total = combIndexList.indicies.size(); for (QSet<int>::iterator itr = combIndexList.indicies.begin(); itr != combIndexList.indicies.end(); ++itr) { int index = *itr; AmbiguitySolution &sol = currSolutions[All_Ambiguity][index]; //make instead of indicies in general to indicies to uniques ones out of All_Ambiguity (*o) << s << "\t"; printDiacritics(currEntry, d, sol, o); int vocLeft = currSolutions[Vocalization].size(); (*o) << "\t" << vocLeft << "\t" << (1.0 / ((double)total)); for (int amb = 0; amb < ambiguitySize; amb++) { (*o) << "\t" << valid_ratio[amb]; } (*o) << "\n"; } } } } if (allPossibleComb.size() == 0) { return; } } for (int amb = 0; amb < ambiguitySize; amb++) { if (sub_total[amb] == 0) { if (amb == (int)All_Ambiguity && !currEntry.isEmpty()) { countWithoutDiacritics++; } } else { /*if (diacriticsCount<0) { qDebug()<<sub_total[amb]<<" vs "<<currSolutions[amb].size(); assert(sub_total[amb]==currSolutions[amb].size()); }*/ left[amb] += ((double)sub_left[amb]) / sub_total[amb] * currSolutions[amb].size(); worstLeft[amb] += worst_sub_Left[amb]; bestLeft[amb] += best_sub_Left[amb]; leftBranching[amb] += sub_left[amb]; totalBranching[amb] += sub_total[amb]; total[amb] += currSolutions[amb].size(); } assert(totalBranching[amb] > 0); if (currSolutions[amb].size() != 0) { countAmbiguity[amb]++; if (sub_left[amb] < sub_total[amb]) { countReduced[amb]++; } } } }