bool intersect(int c1, int c2, const alignment& A) { for(int i=0;i<A.n_sequences();i++) { if (not A.gap(c1,i) and not A.gap(c2,i)) return true; } return false; }
ublas::matrix<int> get_path_counts(const alignment& A,int node1, int node2) { using namespace A2; int state1 = states::S; ublas::matrix<int> counts(5,5); counts.clear(); for(int column=0;column<A.length();column++) { int state2 = -1; if (A.gap(column,node1)) { if (A.gap(column,node2)) continue; else state2 = states::G1; } else { if (A.gap(column,node2)) state2 = states::G2; else state2 = states::M; } counts(state1,state2)++; state1 = state2; } counts(state1,states::E)++; return counts; }
double getSimilarity(const alignment& A,int s1,int s2) { int match=0; int total=0; for(int column=0;column<A.length();column++) { if (A.gap(column,s1) or A.gap(column,s2)) continue; total++; if (A(column,s1) == A(column,s2)) match++; } return double(match)/total; }
alignment get_alignment(const ublas::matrix<int>& M, alignment& A1) { alignment A2 = A1; A2.changelength(M.size1()); // get letters information vector<vector<int> > sequences; for(int i=0;i<A1.n_sequences();i++) { vector<int> sequence; for(int c=0;c<A1.length();c++) { if (not A1.gap(c,i)) sequence.push_back(A1(c,i)); } sequences.push_back(sequence); } for(int i=0;i<A2.n_sequences();i++) { for(int c=0;c<A2.length();c++) { int index = M(c,i); if (index >= 0) index = sequences[i][index]; A2.set_value(c,i, index); } } return A2; }
unsigned total_length_indels2(const alignment& A,const Tree& T) { unsigned total=0; for(int c=0;c<A.length();c++) { for(int b=0;b<T.n_branches();b++) { int t = T.branch(b).target(); int s = T.branch(b).source(); if (A.gap(c,t) and not A.gap(c,s)) total++; if (A.gap(c,s) and not A.gap(c,t)) total++; } } return total; }
/// Check that internal node states are consistent void check_internal_nodes_connected(const alignment& A,const Tree& T,const vector<int>& ignore) { for(int column=0;column<A.length();column++) { dynamic_bitset<> present(T.n_nodes()); for(int i=0;i<T.n_nodes();i++) present[i] = not A.gap(column,i); if (not all_characters_connected(T,present,ignore)) { cerr<<"Internal node states are inconsistent in column "<<column<<endl; cerr<<A<<endl; throw myexception()<<"Internal node states are inconsistent in column "<<column; } } }
vector<int> get_path(const alignment& A,int node1, int node2) { vector<int> state; state.reserve(A.length()+1); for(int column=0;column<A.length();column++) { if (A.gap(column,node1)) { if (A.gap(column,node2)) continue; else state.push_back(1); } else { if (A.gap(column,node2)) state.push_back(2); else state.push_back(0); } } state.push_back(3); return state; }
bool after(int c1, int c2, const alignment& A,const vector<int>& nodes) { assert(nodes.size() == A.n_sequences()); for(int i=0;i<nodes.size();i++) { bool p1 = not A.gap(c1,nodes[i]); bool p2 = not A.gap(c2,nodes[i]); if (p2 and not p1) return true; if (p1 and not p2) return false; } return false; }
vector<int> getorder(const alignment& A,int n1,int n2) { // Get node order vector<int> nodes; nodes.push_back(n1); nodes.push_back(n2); for(int i=0;i<A.n_sequences();i++) if (i != n1 and i != n2) nodes.push_back(i); // Get starting column arrangement vector<int> columns; for(int column=0;column<A.length();column++) columns.push_back(column); //-------- Re-order unordered columns by AP order ---------// for(int i=0;i<columns.size()-1;) { if (not intersect(columns[i],columns[i+1],A) and after(columns[i],columns[i+1],A,nodes)) { std::swap(columns[i],columns[i+1]); if (i>0) i--; } else i++; } vector<int> bits; for(int i=0;i<columns.size();i++) { int column = columns[i]; int b = 0; if (not A.gap(column,n1)) b |= (1<<0); if (not A.gap(column,n2)) b |= (1<<1); if (b) bits.push_back(b); } return bits; }
double fraction_identical(const alignment& A,int s1,int s2,bool gaps_count) { unsigned total=0; unsigned same =0; for(int i=0;i<A.length();i++) { if (A.gap(i,s1) and A.gap(i,s2)) continue; if (not gaps_count and (A.gap(i,s1) or A.gap(i,s2))) continue; total++; if (A(i,s1) == A(i,s2)) same++; } double f = 1; if (total > 0) f = double(same)/total; return f; }
/// Force internal node states are consistent by connecting leaf characters void connect_leaf_characters(alignment& A,const Tree& T) { assert(A.n_sequences() == T.n_nodes()); for(int column=0;column<A.length();column++) { // construct leaf presence/absence mask dynamic_bitset<> present(T.n_nodes()); for(int i=0;i<T.n_nodes();i++) present[i] = not A.gap(column,i); // compute presence/absence for internal nodes connect_all_characters(T,present); // put present characters into the alignment. for(int i=T.n_leaves();i<T.n_nodes();i++) { if (present[i]) A(column,i) = alphabet::not_gap; } } }
/// \brief Check if internal node characters are only present between leaf charaters. /// /// \param A The alignment /// \param T The tree bool check_leaf_characters_minimally_connected(const alignment& A,const Tree& T) { assert(A.n_sequences() == T.n_nodes()); for(int column=0;column<A.length();column++) { // construct leaf presence/absence mask dynamic_bitset<> present(T.n_nodes()); for(int i=0;i<T.n_nodes();i++) present[i] = not A.gap(column,i); // compute presence/absence for internal nodes connect_all_characters(T,present); // put present characters into the alignment. for(int i=T.n_leaves();i<T.n_nodes();i++) if (present[i] != A.character(column,i)) return false; } return true; }
ublas::matrix<int> get_SM(const alignment& A,const Tree& T) { ublas::matrix<int> SM(A.length(),2*T.n_branches()); vector<const_branchview> branches = branches_toward_from_node(T,T.n_leaves()); // Compute the sub-alignments vector<const_branchview> temp;temp.reserve(2); for(int i=0;i<branches.size();i++) { int b = branches[i]; int l=0; for(int c=0;c<SM.size1();c++) { SM(c,b) = alphabet::gap; // for leaf branches fill from the alignment if (branches[i].source().is_leaf_node()) { if (not A.gap(c,b)) SM(c,b) = l++; } // for internal branches fill from the previous branches else { temp.clear(); append(T.directed_branch(b).branches_before(),temp); assert(temp.size() == 2); if (SM(c,temp[0]) != -1 or SM(c,temp[1]) != -1) SM(c,b) = l++; } } } return SM; }