Exemple #1
0
vector_t
svd_hint_from_most_similar(
    index_t  const user, 
    matrix_t const & sim, 
    matrix_t const & P, 
    matrix_t const & Q)
{
    stack::fe_asserter dummy{};
    vector_ll_t neighbors{std::min<index_t>(10,sim.get_rows())};
    vector_t    weights{std::min<index_t>(10,sim.get_rows())};
    most_similar(user, neighbors, weights, sim);
    return svd_hint(neighbors, weights, P, Q);
}
matrix_t matrix_t::solve(matrix_t const &rhs) const
{
	stack::fe_asserter dummy{};
	// it appears as if dgesv works only for square matrices Oo
	// --> if the matrix was rectangular, then they system would be over/under determined
	// and we would need least squares instead (LAPACKE_dgels)
	stack_assert(get_rows() == get_cols());

	stack_assert(this->get_rows() == rhs.get_rows());

	// TODO assert that this matrix is not singular
	matrix_t A = this->clone(); // will be overwritten by LU factorization
	matrix_t b = rhs.clone();

	// thes solution is overwritten in b
	vector_ll_t ipiv{A.get_rows()};

	stack_assert(0 == LAPACKE_dgesv(LAPACK_COL_MAJOR, 
		A.get_rows(), rhs.get_cols()/*nrhs*/,
		A.get_data(), A.ld(),  
		ipiv.get_data(), 
		b.get_data(), b.ld()));

	return b;
}
Exemple #3
0
matrix_t operator+(matrix_t const & X, diag_t const & D)
{
    stack::fe_asserter fe{};
    stack_assert(X.get_rows() == D.get_rows());
    stack_assert(X.get_cols() == D.get_cols());
    matrix_t ret = X.clone();

    vector_t Xdiag = vector_t{
        ret.get_data(),
        ret.get_rows()/*it's a square matrix*/,
        ret.get_rows() + 1, // diagonal entries of a square matrix
    };
    Xdiag += diag_clone(D);

    return ret;
}
Exemple #4
0
// mult D * X
matrix_t diag_t::right_mult(matrix_t const &X) const
{
    stack::fe_asserter fe{};
    stack_assert(diagonal.get_len() == X.get_rows());

    matrix_t ret = X.clone();

    if(ret.get_rows() < ret.get_cols())
        for(size_t r = 0; r < ret.get_rows(); r++)
            ret[r] *= diagonal[r];
    else
        for(size_t c = 0; c < ret.get_cols(); c++)
            ret.get_col(c) *=  diagonal;

    return ret;
}
Exemple #5
0
void most_similar(
    index_t     const user, 
    vector_ll_t /*out*/ neighbors, 
    vector_t    /*out*/ weights,
    matrix_t    const sim) 
{
    stack_assert(neighbors.get_len() <= sim.get_rows());
    stack_assert(weights.get_len() <= sim.get_rows());
    stack_assert(neighbors.get_len() == weights.get_len());
    stack_assert(sim.get_rows() == sim.get_cols());
    // assert symmetric matrix; how ?
    stack_assert(sim.get_rows() > user);
    stack_assert(user >= 0); // always true if typeof(user) is unsigned.

    stack::fe_asserter dummy1{};
    // sort users by their similarity this this user 
    scoped_timer dummy(__func__);
    vector_t sim_vec = sim.get_row_clone(user);
    stack_assert(std::abs(boost::math::float_distance(sim_vec[user], 1.0)) <= 2); 
    // The previous assert won't hold when using perturbed similarity,
    // However, we require it is set artificially to 1.
    
    std::vector<index_t> all_others(sim.get_rows());
    std::iota(std::begin(all_others), std::end(all_others), 0);
    all_others.erase(std::begin(all_others) + user); // TODO could be optimized by doing iota is two steps
    
// top-k
    // heapify(all_others.data(), sim_vec, neighbors.get_len());
    // for (index_t i = 0; i < sim.get_rows(); i++)
    // {   
    //     // only consider unseen neighbors
    //     // this loop could be unnecessary given the next conditional, but be safe first then verify later
    //     if(i == user || std::find(std::begin(all_others), std::begin(all_others) + i, i) != std::begin(all_others) + i)
    //         continue;
    //     if (sim_vec[i] > sim_vec[all_others[0]] /*top of the heap: the min value of all similarities we have*/)
    //     {
    //         all_others[0] = i; // discards the old value
    //         sift_down(all_others.data(), sim_vec.get_data(), static_cast<index_t>(0), static_cast<index_t>(neighbors.get_len() - 1) /*end of heap - inclusive*/);
    //     }
    // }
    // I have to test my heap first. Be safe for now.
    std::partial_sort(
        std::begin(all_others), 
        std::begin(all_others) + neighbors.get_len(),
        std::end(all_others),
        [=](size_t a, size_t b)->bool
        {
            std::cout << "a = " << a << std::endl;
            std::cout << "b = " << b << std::endl;
            stack_assert(a < sim_vec.get_len());
            stack_assert(b < sim_vec.get_len());
            return sim_vec[a] >= sim_vec[b]; // descending order
        });
// get weights of neighbors
    all_others.resize(neighbors.get_len());

    std::sort(std::begin(all_others), std::end(all_others));
    {
        size_t i = 0;
        for(auto n : all_others)
        {
            neighbors[i] = n;
            weights[i] = sim_vec[n];
            stack_assert(weights[i] >= 0);
            ++i;
        }
    }
// normalize weights
    // weights must be positive and sum to 1, normalize_1 will make them sum to 1 
    // and we have already verified they are positive
    weights.normalize_1();
}