string KmerAffectAnalyser::toString() const{ string kmer; for (size_t i = 0; i < affectations.size(); i++) { kmer += affectations[i].toString(); #ifdef DEBUG_KMERS kmer += ": "+spaced(seq.substr(i,kms.getS()), kms.getSeed())+"\n"; #endif } return kmer; }
void FuzzySearchImpl::query(const QString &req, QVector<Service::Item *> *res) const { QVector<QString> words; for (QString &word : req.split(QRegExp("\\W+"), QString::SkipEmptyParts)) words.append(word.toLower()); QVector<QMap<Service::Item *, unsigned int>> resultsPerWord; // Quit if there are no words in query if (words.empty()) return; // Split the query into words for (QString &word : words) { unsigned int delta = word.size()/3; // Get qGrams with counts of this word QMap<QString, unsigned int> qGrams; QString spaced(_q-1,' '); spaced.append(word.toLower()); for (unsigned int i = 0 ; i < static_cast<unsigned int>(word.size()); ++i) ++qGrams[spaced.mid(i,_q)]; // Get the words referenced by each qGram an increment their // reference counter QMap<QString, unsigned int> wordMatches; // Iterate over the set of qgrams in the word for (QMap<QString, unsigned int>::const_iterator it = qGrams.cbegin(); it != qGrams.end(); ++it) { // Iterate over the set of words referenced by this qGram for (QMap<QString, unsigned int>::const_iterator wit = _qGramIndex[it.key()].begin(); wit != _qGramIndex[it.key()].cend(); ++wit) { // CRUCIAL: The match can contain only the commom amount of qGrams wordMatches[wit.key()] += (it.value() < wit.value()) ? it.value() : wit.value(); } } // Allocate a new set resultsPerWord.push_back(QMap<Service::Item *, unsigned int>()); QMap<Service::Item *, unsigned int>& resultsRef = resultsPerWord.back(); // Unite the items referenced by the words accumulating their #matches for (QMap<QString, unsigned int>::const_iterator wm = wordMatches.begin(); wm != wordMatches.cend(); ++wm) { // // Do some kind of (cheap) preselection by mathematical bound // if (wm.value() < qGrams.size()-delta*_q) // continue; // Now check the (expensive) prefix edit distance if (!checkPrefixEditDistance(word, wm.key(), delta)) continue; for(Service::Item * item: _invertedIndex[wm.key()]) { resultsRef[item] += wm.value(); } } } // Intersect the set of items references by the (referenced) words // This assusmes that there is at least one word (the query would not have // been started elsewise) QVector<QPair<Service::Item *, unsigned int>> finalResult; if (resultsPerWord.size() > 1) { // Get the smallest list for intersection (performance) unsigned int smallest=0; for (unsigned int i = 1; i < static_cast<unsigned int>(resultsPerWord.size()); ++i) if (resultsPerWord[i].size() < resultsPerWord[smallest].size()) smallest = i; bool allResultsContainEntry; for (QMap<Service::Item *, unsigned int>::const_iterator r = resultsPerWord[smallest].begin(); r != resultsPerWord[smallest].cend(); ++r) { // Check if all results contain this entry allResultsContainEntry=true; unsigned int accMatches = resultsPerWord[smallest][r.key()]; for (unsigned int i = 0; i < static_cast<unsigned int>(resultsPerWord.size()); ++i) { // Ignore itself if (i==smallest) continue; // If it is in: check next relutlist if (resultsPerWord[i].contains(r.key())) { // Accumulate matches accMatches += resultsPerWord[i][r.key()]; continue; } allResultsContainEntry = false; break; } // If this is not common, check the next entry if (!allResultsContainEntry) continue; // Finally this match is common an can be put into the results finalResult.append(QPair<Service::Item *, unsigned int>(r.key(), accMatches)); } } else // Else do it without intersction { for (QMap<Service::Item *, unsigned int>::const_iterator r = resultsPerWord[0].begin(); r != resultsPerWord[0].cend(); ++r) finalResult.append(QPair<Service::Item *, unsigned int>(r.key(), r.value())); } // Sort em by relevance std::sort(finalResult.begin(), finalResult.end(), [&](QPair<Service::Item *, unsigned int> x, QPair<Service::Item *, unsigned int> y) {return x.second > y.second;}); for (QPair<Service::Item *, unsigned int> pair : finalResult){ res->append(pair.first); } }