unsigned n_homologous(const alignment& A,int s1,int s2) { unsigned same =0; for(int i=0;i<A.length();i++) { if (A.character(i,s1) and A.character(i,s2)) same++; } return same;; }
unsigned n_with_identity(const alignment& A,int s1,int s2,double I) { // Get matches vector<int> F(A.length()+1); unsigned L=0; unsigned T = 0; F[0]=0; for(int i=0;i<A.length();i++) { if (not A.character(i,s1) and not A.character(i,s2)) continue; L++; if (A(i,s1) == A(i,s2)) T++; F[L] = T; } F.resize(L+1); // Get positions vector<int> FI(T+1); FI[0]=0; for(int i=0;i<L;i++) if (F[i+1] > F[i]) FI[F[i+1]] = i+1; // tag positions that vector<int> tagged(L,0); const unsigned w = 4; for(int i=1;i<=T;i++) { for(int j=20;j>=w;j--) { int i2 = i+j; if (i2 > T) continue; assert(FI[i] > 0 and FI[i] <=L); assert(FI[i2] > 0 and FI[i2] <=L); assert(FI[i2] > FI[i]); if (double(i2-i+1)/(FI[i2]-FI[i]+1) > I) { for(int k=FI[i];k<=FI[i2];k++) tagged[k-1]=1; break; } } } return sum(tagged); }
vector<int> alignment_row_letters(const alignment& A, int i) { vector<int> s; for(int c=0;c<A.length();c++) if (A.character(c,i)) s.push_back(A(c,i)); return s; }
int n_characters(const alignment& A, int column) { int count=0; for(int i=0;i<A.n_sequences();i++) if (A.character(column,i)) count++; return count; }
double fraction_homologous(const alignment& A,int s1,int s2) { unsigned total=0; unsigned same =0; for(int i=0;i<A.length();i++) { if (not A.character(i,s1) and not A.character(i,s2)) continue; total++; if (A.character(i,s1) and A.character(i,s2)) same++; } double f = 1; if (total > 0) f = double(same)/total; return f; }
/// Replace each letter with its position in its sequence ublas::matrix<int> M(const alignment& A1) { ublas::matrix<int> A2(A1.length(),A1.n_sequences()); for(int i=0;i<A2.size2();i++) { int pos=0; for(int column=0;column<A2.size1();column++) { if (A1.character(column,i)) { A2(column,i) = pos; pos++; } else A2(column,i) = A1(column,i); } assert(pos == A1.seqlength(i)); } return A2; }
/// Construct a mapping of letters to columns for each leaf sequence vector< vector<int> > column_lookup(const alignment& A,int nleaves) { if (nleaves == -1) nleaves = A.n_sequences(); vector< vector<int> > result(nleaves); for(int i=0;i<nleaves;i++) { vector<int>& columns = result[i]; columns.reserve(A.length()); for(int column=0;column<A.length();column++) { if (A.character(column,i)) columns.push_back(column); } } return result; }
/// \brief Check if internal node characters are only present between leaf charaters. /// /// \param A The alignment /// \param T The tree bool check_leaf_characters_minimally_connected(const alignment& A,const Tree& T) { assert(A.n_sequences() == T.n_nodes()); for(int column=0;column<A.length();column++) { // construct leaf presence/absence mask dynamic_bitset<> present(T.n_nodes()); for(int i=0;i<T.n_nodes();i++) present[i] = not A.gap(column,i); // compute presence/absence for internal nodes connect_all_characters(T,present); // put present characters into the alignment. for(int i=T.n_leaves();i<T.n_nodes();i++) if (present[i] != A.character(column,i)) return false; } return true; }
bool all_gaps(const alignment& A,int column) { for(int i=0;i<A.n_sequences();i++) if (A.character(column,i)) return false; return true; }
bool all_gaps(const alignment& A,int column,const boost::dynamic_bitset<>& mask) { for(int i=0;i<A.n_sequences();i++) if (mask[i] and A.character(column,i)) return false; return true; }