Пример #1
0
string KmerAffectAnalyser::toString() const{
  string kmer;
  for (size_t i = 0; i < affectations.size(); i++) {
    kmer += affectations[i].toString();
#ifdef DEBUG_KMERS
    kmer += ": "+spaced(seq.substr(i,kms.getS()), kms.getSeed())+"\n";
#endif
  }
  return kmer;
}
Пример #2
0
void FuzzySearchImpl::query(const QString &req, QVector<Service::Item *> *res) const
{
	QVector<QString> words;
	for (QString &word : req.split(QRegExp("\\W+"), QString::SkipEmptyParts))
		words.append(word.toLower());
	QVector<QMap<Service::Item *, unsigned int>> resultsPerWord;

	// Quit if there are no words in query
	if (words.empty())
		return;

	// Split the query into words
	for (QString &word : words)
	{
		unsigned int delta = word.size()/3;

		// Get qGrams with counts of this word
		QMap<QString, unsigned int> qGrams;
		QString spaced(_q-1,' ');
		spaced.append(word.toLower());
		for (unsigned int i = 0 ; i < static_cast<unsigned int>(word.size()); ++i)
			++qGrams[spaced.mid(i,_q)];

		// Get the words referenced by each qGram an increment their
		// reference counter
		QMap<QString, unsigned int> wordMatches;
		// Iterate over the set of qgrams in the word
		for (QMap<QString, unsigned int>::const_iterator it = qGrams.cbegin(); it != qGrams.end(); ++it)
		{
			// Iterate over the set of words referenced by this qGram
			for (QMap<QString, unsigned int>::const_iterator wit = _qGramIndex[it.key()].begin(); wit != _qGramIndex[it.key()].cend(); ++wit)
			{
				// CRUCIAL: The match can contain only the commom amount of qGrams
				wordMatches[wit.key()] += (it.value() < wit.value()) ? it.value() : wit.value();
			}
		}

		// Allocate a new set
		resultsPerWord.push_back(QMap<Service::Item *, unsigned int>());
		QMap<Service::Item *, unsigned int>& resultsRef = resultsPerWord.back();

		// Unite the items referenced by the words accumulating their #matches
		for (QMap<QString, unsigned int>::const_iterator wm = wordMatches.begin(); wm != wordMatches.cend(); ++wm)
		{
//			// Do some kind of (cheap) preselection by mathematical bound
//			if (wm.value() < qGrams.size()-delta*_q)
//				continue;

			// Now check the (expensive) prefix edit distance
			if (!checkPrefixEditDistance(word, wm.key(), delta))
				continue;


			for(Service::Item * item: _invertedIndex[wm.key()])
			{
				resultsRef[item] += wm.value();
			}
		}
	}

	// Intersect the set of items references by the (referenced) words
	// This assusmes that there is at least one word (the query would not have
	// been started elsewise)
	QVector<QPair<Service::Item *, unsigned int>> finalResult;
	if (resultsPerWord.size() > 1)
	{
		// Get the smallest list for intersection (performance)
		unsigned int smallest=0;
		for (unsigned int i = 1; i < static_cast<unsigned int>(resultsPerWord.size()); ++i)
			if (resultsPerWord[i].size() < resultsPerWord[smallest].size())
				smallest = i;

		bool allResultsContainEntry;
		for (QMap<Service::Item *, unsigned int>::const_iterator r = resultsPerWord[smallest].begin(); r != resultsPerWord[smallest].cend(); ++r)
		{
			// Check if all results contain this entry
			allResultsContainEntry=true;
			unsigned int accMatches = resultsPerWord[smallest][r.key()];
			for (unsigned int i = 0; i < static_cast<unsigned int>(resultsPerWord.size()); ++i)
			{
				// Ignore itself
				if (i==smallest)
					continue;

				// If it is in: check next relutlist
				if (resultsPerWord[i].contains(r.key()))
				{
					// Accumulate matches
					accMatches += resultsPerWord[i][r.key()];
					continue;
				}

				allResultsContainEntry = false;
				break;
			}

			// If this is not common, check the next entry
			if (!allResultsContainEntry)
				continue;

			// Finally this match is common an can be put into the results
			finalResult.append(QPair<Service::Item *, unsigned int>(r.key(), accMatches));
		}
	}
	else // Else do it without intersction
	{
		for (QMap<Service::Item *, unsigned int>::const_iterator r = resultsPerWord[0].begin(); r != resultsPerWord[0].cend(); ++r)
			finalResult.append(QPair<Service::Item *, unsigned int>(r.key(), r.value()));
	}

	// Sort em by relevance
	std::sort(finalResult.begin(), finalResult.end(),
			  [&](QPair<Service::Item *, unsigned int> x, QPair<Service::Item *, unsigned int> y)
				{return x.second > y.second;});

	for (QPair<Service::Item *, unsigned int> pair : finalResult){
		res->append(pair.first);
	}
}