예제 #1
0
파일: main.cpp 프로젝트: congpp/Qt_Proj
/*
 * Input:   A English sentence and the position it takes in the artical
 * Return:  A list of integers represent all uncorrect words
 * Mark:    Your sentence should not contains any punctuation
 */
QList<int> QMyHashMap::correctSentence(QString s, int startpos)
{
    QList<int> pos;
    int curpos=startpos;
    if(s.length()==0)
        return pos;
    QTextStream ts(&s,QIODevice::ReadOnly);
    QString wordstofind;
    while(!ts.atEnd())
    {
        ts>>wordstofind;
        qDebug()<<wordstofind<<" --->";
        if(findWord(wordstofind)=="")
        {
            correctWord(wordstofind);
            pos.push_back(curpos);
        }
        // +1 because the space was ingnore by the stream
        curpos+=wordstofind.length()+1;
    }
    //qDebug()<<pos;
    return pos;
}
예제 #2
0
void TypoMan::computeFromWord(const std::string& word)
{
    if (word.empty()) {
        return;
    }
    if (static_cast<int>(word.size()) < minimumWordSize) {
        return;
    }
    if (exists(typos, word)) {
        return;
    }
    auto corrections = correctWord(word, LanguageLocale::en_US);
    if (corrections.empty()) {
        return;
    }
    if (ignoreBritishEnglish) {
        std::vector<std::string> correctionsInUS;
        std::swap(corrections, correctionsInUS);
        auto correctionsInUK = correctWord(word, LanguageLocale::en_UK);
        std::set_intersection(
            std::begin(correctionsInUS),
            std::end(correctionsInUS),
            std::begin(correctionsInUK),
            std::end(correctionsInUK),
            std::back_inserter(corrections));
    }
    if (!isStrictWhiteSpace) {
        for (auto & correction : corrections) {
            auto hunks = somera::computeDiff(word, correction);
            std::string filtered;
            for (auto & hunk : hunks) {
                if (hunk.operation != DiffOperation::Equality
                    && hunk.text == " ") {
                    continue;
                }
                if (hunk.operation != DiffOperation::Deletion) {
                    filtered += hunk.text;
                }
            }
            correction = filtered;
        }
        eraseIf(corrections, [&](const std::string& correction) {
            return correction.empty();
        });
    }
    if (!isStrictHyphen) {
        for (auto & correction : corrections) {
            auto hunks = somera::computeDiff(word, correction);
            std::string filtered;
            for (auto & hunk : hunks) {
                if (hunk.operation != DiffOperation::Equality
                    && hunk.text == "-") {
                    continue;
                }
                if (hunk.operation != DiffOperation::Deletion) {
                    filtered += hunk.text;
                }
            }
            correction = filtered;
        }
        eraseIf(corrections, [&](const std::string& correction) {
            return correction.empty();
        });
    }
    if (!isStrictLetterCase) {
        eraseIf(corrections, [&](const std::string& correction) {
            return StringHelper::toLower(word) == StringHelper::toLower(correction);
        });
    }

    std::sort(std::begin(corrections), std::end(corrections));
    corrections.erase(
        std::unique(std::begin(corrections), std::end(corrections)),
        std::end(corrections));
    sortNearly(word, corrections);

    assert(maxCorrectWordCount > 0);
    if (static_cast<int>(corrections.size()) > maxCorrectWordCount) {
        corrections.resize(maxCorrectWordCount);
    }

    Typo typo;
    typo.typo = word;
    typo.corrections = std::move(corrections);
    if (onFoundTypo && !typo.corrections.empty()) {
        onFoundTypo(typo);
    }
    addTypo(typos, std::move(typo));
}