// -------------------------------------------------------------------- static matrix_type _compute_rooted_fa(const matrix_type & fa) { const size_t K = fa.get_height(); const size_t J = fa.get_width(); assert(K > 1); matrix_type rooted_fa (K - 1, J); for (size_t k = 0; k + 1 < K; k++) for (size_t j = 0; j < J; j++) rooted_fa(k, j) = fa(k + 1, j) - fa(0, j); return rooted_fa; }
// ---------------------------------------------------------------- explicit q_data(const matrix_type & d) : d_ij () , d_ik () , d_jk () , i () , j () { static const auto k_0_5 = value_type(0.5); const auto n = d.get_height(); const auto k_n_2 = value_type(n - 2); // // Cache the row sums; column sums would work equally well // because the matrix is symmetric. // std::vector<value_type> sigma; for (size_t c = 0; c < n; c++) sigma.push_back(d.get_row_sum(c)); // // Compute the values of the Q matrix. // matrix_type q (n, n); for (size_t r = 0; r < n; r++) for (size_t c = 0; c < r; c++) q(r, c) = k_n_2 * d(r, c) - sigma[r] - sigma[c]; // // Find the cell with the minimum value. // i = 1, j = 0; for (size_t r = 2; r < n; r++) for (size_t c = 0; c < r; c++) if (q(r, c) < q(i, j)) i = r, j = c; // // Compute distances between the new nodes. // d_ij = d(i, j); d_ik = k_0_5 * (d_ij + ((sigma[i] - sigma[j]) / k_n_2)); d_jk = d_ij - d_ik; }
/// /// \return A new-and-improved F matrix. /// static matrix_type improve_f( const genotype_matrix_type & g, ///< The G matrix. const matrix_type & q, ///< The Q matrix. const matrix_type & fa, ///< The F matrix. const matrix_type & fb, ///< The 1-F matrix. const matrix_type & qfa, ///< The Q*F matrix. const matrix_type & qfb, ///< The Q*(1-F) matrix. const matrix_type * fif, ///< The Fin-force matrix. const bool frb) ///< Using frequency-bounds. { assert(verification_type::validate_gqf_sizes(g, q, fa)); assert(verification_type::validate_gqf_sizes(g, q, fb)); assert(verification_type::validate_q(q)); assert(verification_type::validate_f(fa)); assert(nullptr == fif || !frb); const auto I = g.get_height(); const auto K = fa.get_height(); const auto J = fa.get_width(); assert(nullptr == fif || verification_type:: validate_fif_size(*fif, K, J)); matrix_type f_dst (K, J); static const std::vector<size_t> fixed_active_set; matrix_type derivative_vec (K, 1); matrix_type hessian_mat (K, K); const auto frb_delta = value_type(1.0) / (value_type(2 * I) + value_type(1.0)); for (size_t j = 0; j < J; j++) { const auto f_column = fa.copy_column(j); g.compute_derivatives_f( q, fa, fb, qfa, qfb, j, derivative_vec, hessian_mat); const auto coefficients_mat = _create_coefficients_mat(K, 0); auto b_vec = _create_b_vec(f_column, 0); if (nullptr != fif) { for (size_t k = 0; k < fif->get_height(); k++) { b_vec[k + 0] = value_type(0); b_vec[k + K] = value_type(0); } } else if (frb) { for (size_t k = 0; k < K; k++) { b_vec[k + 0] -= frb_delta; b_vec[k + K] -= frb_delta; } } std::vector<size_t> active_set { 0 }; matrix_type delta_vec (K, 1); delta_vec[0] = -b_vec[0]; qpas_type::loop_over_active_set( b_vec, coefficients_mat, hessian_mat, derivative_vec, fixed_active_set, active_set, delta_vec); for (size_t k = 0; k < K; k++) f_dst(k, j) = f_column[k] + delta_vec[k]; } f_dst.clamp(min, max); return f_dst; }
/// /// \return A new-and-improved Q matrix. /// static matrix_type improve_q( const genotype_matrix_type & g, ///< The G matrix. const matrix_type & q, ///< The Q matrix. const matrix_type & fa, ///< The F matrix. const matrix_type & fb, ///< The 1-F matrix. const matrix_type & qfa, ///< The Q*F matrix. const matrix_type & qfb, ///< The Q*(1-F) matrix. const forced_grouping_type * fg) ///< The force-grouping. { assert(verification_type::validate_gqf_sizes(g, q, fa)); assert(verification_type::validate_gqf_sizes(g, q, fb)); assert(verification_type::validate_q(q)); assert(verification_type::validate_f(fa)); const auto I = q.get_height(); const auto K = q.get_width(); matrix_type q_dst (I, K); const std::vector<size_t> fixed_active_set { K + K }; matrix_type derivative_vec (K, 1); matrix_type hessian_mat (K, K); for (size_t i = 0; i < I; i++) { const auto q_row = q.copy_row(i); g.compute_derivatives_q( q, fa, fb, qfa, qfb, i, derivative_vec, hessian_mat); const auto coefficients_mat = _create_coefficients_mat(K, 1); auto b_vec = _create_b_vec(q_row, 1); if (nullptr != fg) { for (size_t k = 0; k < K; k++) { b_vec[k + 0] -= fg->get_min(i, k); b_vec[k + K] += fg->get_max(i, k) - value_type(1); } } std::vector<size_t> active_set { 0 }; matrix_type delta_vec (K, 1); delta_vec[0] = -b_vec[0]; qpas_type::loop_over_active_set( b_vec, coefficients_mat, hessian_mat, derivative_vec, fixed_active_set, active_set, delta_vec); for (size_t k = 0; k < K; k++) q_dst(i, k) = q_row[k] + delta_vec[k]; q_dst.clamp_row(i, min, max); const auto sum = q_dst.get_row_sum(i); q_dst.multiply_row(i, value_type(1) / sum); } return q_dst; }
/// /// Initializes a new instance of the class. /// explicit basic_neighbor_joining( const matrix_type & distances) ///< The distance matrix. : _children () , _lengths () , _names () , _root (invalid_id) { // // Allow empty matrices even though no tree will be produced from // the write method. // if (distances.is_empty()) return; // // Allow just one node even though the tree produced from the write // method will consist on only the node name ("0"). // assert(distances.is_square()); auto n = distances.get_height(); id_type next_id = 0; if (n == 1) { _root = _add_leaf(next_id); return; } // // Prepare a list of ids for the initial set of nodes. // typedef std::vector<id_type> vector_type; vector_type x; for (size_t i = 0; i < n; i++) x.push_back(_add_leaf(next_id)); // // Prepare the distance matrix that will be reduced by the // algorithm. // matrix_type d (distances); // // Loop until there are only two nodes remaining in the distance // matrix. // while (n > 2) { // // Find the minimum Q value in the matrix, and use it to find // the two nodes that will be joined. Join them by creating a // new parent node. // const q_data q (d); const id_type id (next_id++); _add_parent(id, x[q.i], q.d_ik); _add_parent(id, x[q.j], q.d_jk); // // Prepare the new, reduced distance matrix as well as the // corresponding id vector. // matrix_type dd (n - 1, n - 1); vector_type xx { id }; for (size_t r = 0, rr = 1; r < n; r++) { if (r == q.i || r == q.j) continue; xx.push_back(x[r]); dd(rr, 0) = value_type(0.5) * (d(r, q.i) + d(r, q.j) - q.d_ij); for (size_t c = 0, cc = 1; c < r; c++) if (c != q.i && c != q.j) dd(rr, cc++) = d(r, c); rr++; } // // Copy the lower triangle to the upper triangle so the data // in the next Q matrix matches the expected values. // dd.copy_lower_to_upper(); d.swap(dd); x.swap(xx); n--; } // // Connect the last two nodes; note the loop above places new nodes // at index zero, so here it is known that the leaf node must be at // index 1, and so the root note must be at index 0. // _root = x[0]; _add_parent(_root, x[1], d(1, 0)); }