Example #1
0
void FuzzySearchImpl::buildIndex()
{
	_invertedIndex.clear();
	_qGramIndex.clear();

	// Build inverted index
	for (Service::Item *item : _indexRef) {
		QStringList words = item->title().split(QRegExp("\\W+"), QString::SkipEmptyParts);
		for (QString &w : words)
			_invertedIndex[w.toLower()].insert(item);
	}

	// Build qGramIndex
	for (InvertedIndex::const_iterator it = _invertedIndex.cbegin(); it != _invertedIndex.cend(); ++it)
	{
		//Split the word into lowercase qGrams
		QString spaced = QString(_q-1,' ').append(it.key().toLower());
		for (unsigned int i = 0 ; i < static_cast<unsigned int>(it.key().size()); ++i)
			// Increment #occurences of this qGram in this word
			++_qGramIndex[spaced.mid(i,_q)][it.key()];
	}
}
	bool
	load(const char *file)
	{
		FILE *fp = std::fopen(file, "rb");
		
		if (fp == 0) {
			return false;
		}
		m_centroids.clear();
		m_centroid_labels.clear();
		m_inverted_index.clear();
		
		size_t centroid_num = 0;
		size_t ret = std::fread(&centroid_num, sizeof(centroid_num), 1, fp);
		if (ret != 1) {
			std::fprintf(stderr, "%s: invalid format 1\n", file);
			fclose(fp);
			return false;
		}
		for (size_t i = 0; i < centroid_num; ++i) {
			fv_t centroid;
			size_t word_num = 0;
			ret = fread(&word_num, sizeof(word_num), 1, fp);
			if (ret != 1) {
				std::fprintf(stderr, "%s: invalid format 2\n", file);
				fclose(fp);
				return false;
			}
			for (size_t j = 0; j < word_num; ++j) {
				int word_id;
				float word_weight;
				ret = std::fread(&word_id, sizeof(word_id), 1, fp);
				if (ret != 1) {
					std::fprintf(stderr, "%s: invalid format 3\n", file);
					fclose(fp);
					return false;
				}
				ret = std::fread(&word_weight, sizeof(word_weight), 1, fp);
				if (ret != 1) {
					std::fprintf(stderr, "%s: invalid format 4\n", file);
					fclose(fp);
					return false;
				}
				centroid.insert(std::make_pair(word_id, word_weight));
			}
			m_centroids.push_back(centroid);
		}
		ret = std::fread(&centroid_num, sizeof(centroid_num), 1, fp);
		if (ret != 1) {
			std::fprintf(stderr, "%s: invalid format 5\n", file);
			fclose(fp);
			return false;
		}
		int *buffer = new int[centroid_num];
		ret = std::fread(buffer, sizeof(int), centroid_num, fp);
		if (ret != centroid_num) {
			std::fprintf(stderr, "%s: invalid format 6\n", file);
			delete buffer;
			fclose(fp);
			return false;
		}
		std::copy(buffer, buffer + centroid_num,
				  std::back_inserter(m_centroid_labels));
		delete buffer;
		
		fclose(fp);
		
		m_inverted_index.build(&m_centroids);
		
		return true;
	}