vector_t /* user_factor */ est_factor( vector_ll_t const & neighbors, vector_t const & weights, matrix_t const & P) { stack::fe_asserter dummy1{}; scoped_timer dummy(std::string(__func__)); vector_t user_factor{P.get_cols()}; for(int i = 0; i < neighbors.get_len(); i++) user_factor += weights[i] * P.get_row_clone(neighbors[i]); return user_factor.normalize_2(); }
void most_similar( index_t const user, vector_ll_t /*out*/ neighbors, vector_t /*out*/ weights, matrix_t const sim) { stack_assert(neighbors.get_len() <= sim.get_rows()); stack_assert(weights.get_len() <= sim.get_rows()); stack_assert(neighbors.get_len() == weights.get_len()); stack_assert(sim.get_rows() == sim.get_cols()); // assert symmetric matrix; how ? stack_assert(sim.get_rows() > user); stack_assert(user >= 0); // always true if typeof(user) is unsigned. stack::fe_asserter dummy1{}; // sort users by their similarity this this user scoped_timer dummy(__func__); vector_t sim_vec = sim.get_row_clone(user); stack_assert(std::abs(boost::math::float_distance(sim_vec[user], 1.0)) <= 2); // The previous assert won't hold when using perturbed similarity, // However, we require it is set artificially to 1. std::vector<index_t> all_others(sim.get_rows()); std::iota(std::begin(all_others), std::end(all_others), 0); all_others.erase(std::begin(all_others) + user); // TODO could be optimized by doing iota is two steps // top-k // heapify(all_others.data(), sim_vec, neighbors.get_len()); // for (index_t i = 0; i < sim.get_rows(); i++) // { // // only consider unseen neighbors // // this loop could be unnecessary given the next conditional, but be safe first then verify later // if(i == user || std::find(std::begin(all_others), std::begin(all_others) + i, i) != std::begin(all_others) + i) // continue; // if (sim_vec[i] > sim_vec[all_others[0]] /*top of the heap: the min value of all similarities we have*/) // { // all_others[0] = i; // discards the old value // sift_down(all_others.data(), sim_vec.get_data(), static_cast<index_t>(0), static_cast<index_t>(neighbors.get_len() - 1) /*end of heap - inclusive*/); // } // } // I have to test my heap first. Be safe for now. std::partial_sort( std::begin(all_others), std::begin(all_others) + neighbors.get_len(), std::end(all_others), [=](size_t a, size_t b)->bool { std::cout << "a = " << a << std::endl; std::cout << "b = " << b << std::endl; stack_assert(a < sim_vec.get_len()); stack_assert(b < sim_vec.get_len()); return sim_vec[a] >= sim_vec[b]; // descending order }); // get weights of neighbors all_others.resize(neighbors.get_len()); std::sort(std::begin(all_others), std::end(all_others)); { size_t i = 0; for(auto n : all_others) { neighbors[i] = n; weights[i] = sim_vec[n]; stack_assert(weights[i] >= 0); ++i; } } // normalize weights // weights must be positive and sum to 1, normalize_1 will make them sum to 1 // and we have already verified they are positive weights.normalize_1(); }