void FuzzySearchImpl::buildIndex() { _invertedIndex.clear(); _qGramIndex.clear(); // Build inverted index for (Service::Item *item : _indexRef) { QStringList words = item->title().split(QRegExp("\\W+"), QString::SkipEmptyParts); for (QString &w : words) _invertedIndex[w.toLower()].insert(item); } // Build qGramIndex for (InvertedIndex::const_iterator it = _invertedIndex.cbegin(); it != _invertedIndex.cend(); ++it) { //Split the word into lowercase qGrams QString spaced = QString(_q-1,' ').append(it.key().toLower()); for (unsigned int i = 0 ; i < static_cast<unsigned int>(it.key().size()); ++i) // Increment #occurences of this qGram in this word ++_qGramIndex[spaced.mid(i,_q)][it.key()]; } }
bool load(const char *file) { FILE *fp = std::fopen(file, "rb"); if (fp == 0) { return false; } m_centroids.clear(); m_centroid_labels.clear(); m_inverted_index.clear(); size_t centroid_num = 0; size_t ret = std::fread(¢roid_num, sizeof(centroid_num), 1, fp); if (ret != 1) { std::fprintf(stderr, "%s: invalid format 1\n", file); fclose(fp); return false; } for (size_t i = 0; i < centroid_num; ++i) { fv_t centroid; size_t word_num = 0; ret = fread(&word_num, sizeof(word_num), 1, fp); if (ret != 1) { std::fprintf(stderr, "%s: invalid format 2\n", file); fclose(fp); return false; } for (size_t j = 0; j < word_num; ++j) { int word_id; float word_weight; ret = std::fread(&word_id, sizeof(word_id), 1, fp); if (ret != 1) { std::fprintf(stderr, "%s: invalid format 3\n", file); fclose(fp); return false; } ret = std::fread(&word_weight, sizeof(word_weight), 1, fp); if (ret != 1) { std::fprintf(stderr, "%s: invalid format 4\n", file); fclose(fp); return false; } centroid.insert(std::make_pair(word_id, word_weight)); } m_centroids.push_back(centroid); } ret = std::fread(¢roid_num, sizeof(centroid_num), 1, fp); if (ret != 1) { std::fprintf(stderr, "%s: invalid format 5\n", file); fclose(fp); return false; } int *buffer = new int[centroid_num]; ret = std::fread(buffer, sizeof(int), centroid_num, fp); if (ret != centroid_num) { std::fprintf(stderr, "%s: invalid format 6\n", file); delete buffer; fclose(fp); return false; } std::copy(buffer, buffer + centroid_num, std::back_inserter(m_centroid_labels)); delete buffer; fclose(fp); m_inverted_index.build(&m_centroids); return true; }