예제 #1
0
void insertIntoCombSet(QSet<TextNposNmorphDiaTriplet> &alreadyProceesed, CombSet &allPossibleComb,
                       VocalizedCombination  &comb, int index, AmbiguitySolution &sol) {
    QString s = comb.getString();

    if (comb.hasSelfInconsistency()) {
        qDebug() << "Found inconsistency in generated lexicon entries:\t" << s << "\tpos: " << sol.voc;
        return;
    }

    MorphemeDiacritics mD = sol.getMorphemeDiacriticSummary(comb);
    QString pos = sol.stemPOS;
    //QString voc=sol.voc;
    TextNposNmorphDiaTriplet p(s, pos/*,voc*/, mD);

    if (alreadyProceesed.contains(p)) {
        return;
    }

    alreadyProceesed.insert(p);
    CombSet::iterator itr = allPossibleComb.find(comb);

    if (itr == allPossibleComb.end()) {
        VocCombIndexListPair p(comb);
        p.indicies.insert(index);
        allPossibleComb.insert(p);
    } else {
        VocCombIndexListPair &p = (VocCombIndexListPair &)(
                                      *itr); //safe since changing p by adding index does not disrupt its position in set
        p.indicies.insert(index);
    }
}
예제 #2
0
	CombSet::iterator ReqSelector::operator() (CombSet &global_state,
											   const Requirement &req)
	{
		std::list<CombSet::iterator> local_state_cache;
		for(CombSet::iterator it=global_state.begin();
			it!=global_state.end();++it)
			local_state_cache.push_back(it);

		return (operator () (local_state_cache,std::set<int>(),req)).first;
	}
예제 #3
0
void DiacriticDisambiguationBase::analyzeOne(QString currEntry, const AmbiguitySolutionList &currSol) {
    AmbiguitySolutionList currSolutions[ambiguitySize];

    for (int amb = 0; amb < ambiguitySize; amb++) {
        if ((Ambiguity)amb != All_Ambiguity) {
            currSolutions[amb] = getAmbiguityUnique(currSol, (Ambiguity)amb);
        } else {
            currSolutions[amb] = currSol;
        }
    }

    int sub_total[ambiguitySize] = {0}, sub_left[ambiguitySize] = {0};
    int best_sub_Left[ambiguitySize] = {0}, worst_sub_Left[ambiguitySize] = {0};

    for (int amb = 0; amb < ambiguitySize; amb++) {
        best_sub_Left[amb] = currSolutions[amb].size();
    }

    {
        CombSet allPossibleComb;
        QSet<TextNposNmorphDiaTriplet> alreadyProceesed;

        for (int j = 0; j < currSolutions[All_Ambiguity].size(); j++) { //All_Ambiguity contains all solutions
            AmbiguitySolution &sol = currSolutions[All_Ambiguity][j];
            QString voc = sol.voc;

            if (diacriticsCount > 0) {
                VocalizedCombinationsGenerator v(voc, diacriticsCount);

                if (v.isUnderVocalized()) {
                    VocalizedCombination c = VocalizedCombination::deduceCombination(voc);
                    insertIntoCombSet(alreadyProceesed, allPossibleComb, c, j, sol);
                } else {
                    for (v.begin(); !v.isFinished(); ++v) {
                        VocalizedCombination c = v.getCombination();
                        insertIntoCombSet(alreadyProceesed, allPossibleComb, c, j, sol);
                    }
                }
            } else { //i.e. all diacritics
                VocalizedCombination c = VocalizedCombination::deduceCombination(voc);
                insertIntoCombSet(alreadyProceesed, allPossibleComb, c, j, sol);
            }
        }

        CombSet::iterator itr = allPossibleComb.begin();

        for (; itr != allPossibleComb.end(); itr++) {
            VocCombIndexListPair &combIndexList = (VocCombIndexListPair &)(*itr);
            VocalizedCombination &c = combIndexList.comb;
            QString s = c.getString();
            const QList<Diacritics> &d = c.getDiacritics();
            //const DiacriticsPositionsList & diaPos=c.getShortList();
            int numDia = c.getNumDiacritics();

            if (numDia == 0) {
                continue;
            }

            double valid_ratio[ambiguitySize];

            for (int amb = 0; amb < ambiguitySize; amb++) {
                int valid_count = 0;

                for (int j = 0; j < currSolutions[amb].size(); j++) {
                    if (equal(s, currSolutions[amb][j].voc, true)) {
                        valid_count++;
                    }
                }

                valid_ratio[amb] = ((double)valid_count) / (currSolutions[amb].size());
                sub_left[amb] += valid_count;
                worst_sub_Left[amb] = max(worst_sub_Left[amb], valid_count);
                best_sub_Left[amb] = min(best_sub_Left[amb], valid_count);
                sub_total[amb] += currSolutions[amb].size();

                if (valid_ratio[amb] < 1) {
                    reducingCombinations[amb]++;
                }

                totalCombinations[amb]++;
            }

            bool reduced = currSolutions[Vocalization].size() > 1;
            bool display = ((numDia == diacriticsCount || diacriticsCount == -1) && !suppressOutput && reduced);

            for (int i = 0; i < 2; i++) {
                QTextStream *o = (i == 0 ? & (theSarf->out) : &hadith_out);

                if (display || i == 1) { //always display for hadith_out
                    int total = combIndexList.indicies.size();

                    for (QSet<int>::iterator itr = combIndexList.indicies.begin(); itr != combIndexList.indicies.end(); ++itr) {
                        int index = *itr;
                        AmbiguitySolution &sol =
                            currSolutions[All_Ambiguity][index]; //make instead of indicies in general to indicies to uniques ones out of All_Ambiguity
                        (*o) << s << "\t";
                        printDiacritics(currEntry, d, sol, o);
                        int vocLeft = currSolutions[Vocalization].size();
                        (*o) << "\t" << vocLeft << "\t" << (1.0 / ((double)total));

                        for (int amb = 0; amb < ambiguitySize; amb++) {
                            (*o) << "\t" << valid_ratio[amb];
                        }

                        (*o) << "\n";
                    }
                }
            }
        }

        if (allPossibleComb.size() == 0) {
            return;
        }
    }

    for (int amb = 0; amb < ambiguitySize; amb++) {
        if (sub_total[amb] == 0) {
            if (amb == (int)All_Ambiguity && !currEntry.isEmpty()) {
                countWithoutDiacritics++;
            }
        } else {
            /*if (diacriticsCount<0) {
              qDebug()<<sub_total[amb]<<" vs "<<currSolutions[amb].size();
              assert(sub_total[amb]==currSolutions[amb].size());
              }*/
            left[amb] += ((double)sub_left[amb]) / sub_total[amb] * currSolutions[amb].size();
            worstLeft[amb] += worst_sub_Left[amb];
            bestLeft[amb] += best_sub_Left[amb];
            leftBranching[amb] += sub_left[amb];
            totalBranching[amb] += sub_total[amb];
            total[amb] += currSolutions[amb].size();
        }

        assert(totalBranching[amb] > 0);

        if (currSolutions[amb].size() != 0) {
            countAmbiguity[amb]++;

            if (sub_left[amb] < sub_total[amb]) {
                countReduced[amb]++;
            }
        }
    }
}