float recommender_base::calc_similality(common::sfv_t& q1, common::sfv_t& q2) { float q1_norm = calc_l2norm(q1); float q2_norm = calc_l2norm(q2); if (q1_norm == 0.f || q2_norm == 0.f) { return 0.f; } sort(q1.begin(), q1.end()); sort(q2.begin(), q2.end()); size_t i1 = 0; size_t i2 = 0; float ret = 0.f; while (i1 < q1.size() && i2 < q2.size()) { const string& ind1 = q1[i1].first; const string& ind2 = q2[i2].first; if (ind1 < ind2) { ++i1; } else if (ind1 > ind2) { ++i2; } else { ret += q1[i1].second * q2[i2].second; ++i1; ++i2; } } return ret / q1_norm / q2_norm; }
void inverted_index_storage::calc_scores( const common::sfv_t& query, vector<pair<string, float> >& scores, size_t ret_num) const { float query_norm = calc_l2norm(query); if (query_norm == 0.f) { return; } jubatus::util::data::unordered_map<uint64_t, float> i_scores; for (size_t i = 0; i < query.size(); ++i) { const string& fid = query[i].first; float val = query[i].second; add_inp_scores(fid, val, i_scores); } vector<pair<float, uint64_t> > sorted_scores; for (jubatus::util::data::unordered_map<uint64_t, float>:: const_iterator it = i_scores.begin(); it != i_scores.end(); ++it) { float norm = calc_columnl2norm(it->first); float normed_score = (norm != 0.f) ? it->second / norm / query_norm : 0.f; sorted_scores.push_back(make_pair(normed_score, it->first)); } sort(sorted_scores.rbegin(), sorted_scores.rend()); for (size_t i = 0; i < sorted_scores.size() && i < ret_num; ++i) { scores.push_back( make_pair(column2id_.get_key(sorted_scores[i].second), sorted_scores[i].first)); } }