void peel_n_mutations(const alphabet& a, const vector<int>& letters, const SequenceTree& T, const ublas::matrix<B>& cost,ublas::matrix<B>& n_muts, const vector<const_branchview>& branches) { const int A = a.size(); assert(letters.size() == T.n_leaves()); assert(cost.size1() == A); assert(cost.size2() == A); // we need a scratch row in the matrix assert(n_muts.size1() == T.n_nodes()); assert(n_muts.size2() == A); // compute the max cost -- is this approach a good idea? // Well... this apparently doesn't work. B max_cost = 0; for(int i=0;i<A;i++) for(int j=0;j<A;j++) max_cost = std::max(cost(i,j)+1, max_cost); // clear the length matrix. for(int i=0;i<n_muts.size1();i++) for(int j=0;j<n_muts.size2();j++) n_muts(i,j)=0; // set the leaf costs for(int s=0;s<T.n_leaves();s++) { int L = letters[s]; if (a.is_letter_class(L)) for(int l=0;l<A;l++) if (a.matches(l,L)) n_muts(s,l) = 0; else n_muts(s,l) = max_cost; } // compute the costs for letters at each node for(int i=0;i<branches.size();i++) { int s = branches[i].source(); int t = branches[i].target(); // for each letter l of node target... for(int l=0;l<A;l++) { // compute minimum treelength for data behind source. B temp = n_muts(s,0)+cost(0,l); for(int k=1;k<A;k++) temp = min(temp, n_muts(s,k)+cost(k,l) ); // add it to treelengths for data behind target n_muts(t,l) += temp; } } }
vector<vector<int> > get_all_parsimony_letters(const alphabet& a, const vector<int>& letters, const SequenceTree& T, const ublas::matrix<int>& cost) { int root = T.directed_branch(0).target(); ublas::matrix<int> n_muts(T.n_nodes(), a.size()); peel_n_mutations(a,letters,T,cost,n_muts, branches_toward_node(T,root) ); // get an order list of branches point away from the root; vector<const_branchview> branches = branches_from_node(T,root); std::reverse(branches.begin(),branches.end()); // Allocate space to store the letters for each node vector<vector<int> > node_letters(T.n_nodes()); const unsigned A = a.size(); // choose the cheapest letters at the root { double m = row_min(n_muts,root); for(int l=0;l<A;l++) if (n_muts(root,l) <= m) node_letters[root].push_back(l); } vector<double> temp(A); for(int i=0;i<branches.size();i++) { int s = branches[i].source(); int t = branches[i].target(); vector<double> best(node_letters[s].size()); for(int j=0;j<node_letters[s].size();j++) { for(int l=0;l<A;l++) temp[l] = n_muts(t,l)+cost(l,node_letters[s][j]); best[j] = min(temp); } for(int l=0;l<A;l++) { bool is_best = false; for(int j=0;j<node_letters[s].size() and not is_best;j++) if (n_muts(t,l)+cost(l,node_letters[s][j]) <= best[j]) is_best=true; if (is_best) node_letters[t].push_back(l); } } return node_letters; }
inline double sum(const Matrix Q,const vector<unsigned>& smap, int s1, int l2, const alphabet& a) { double total=0; int n_letters = a.n_letters(); #ifdef DEBUG_SMAP for(int s=0;s<smap.size();s++) if (a.matches(smap[s],l2)) total += Q(s1,s); #else for(int L=0;L<n_letters;L++) if (a.matches(L,l2)) total += sum(Q,smap,n_letters,s1,L); #endif return total; }
B n_mutations(const alphabet& a, const vector<int>& letters, const SequenceTree& T,const ublas::matrix<B>& cost) { int root = T.directed_branch(0).target(); vector<const_branchview> branches = branches_toward_node(T,root); ublas::matrix<B> n_muts(T.n_nodes(), a.size()); return n_mutations(a,letters,T,cost,n_muts,branches); }
vector<int> get_parsimony_letters(const alphabet& a, const vector<int>& letters, const SequenceTree& T, const ublas::matrix<int>& cost) { int root = T.directed_branch(0).target(); ublas::matrix<int> n_muts(T.n_nodes(),a.size()); peel_n_mutations(a,letters,T,cost,n_muts, branches_toward_node(T,root) ); // get an order list of branches point away from the root; vector<const_branchview> branches = branches_from_node(T,root); std::reverse(branches.begin(),branches.end()); // Allocate space to store the letter for each node vector<int> node_letters(T.n_nodes(),-1); // choose the cheapest letter at the root node_letters[root] = row_min(n_muts,root); const unsigned A = a.size(); vector<double> temp(A); for(int i=0;i<branches.size();i++) { int s = branches[i].source(); int t = branches[i].target(); int k = node_letters[s]; assert(k != -1); for(int l=0;l<A;l++) temp[l] = n_muts(t,l)+cost(l,k); node_letters[t] = argmin(temp); } return node_letters; }
std::vector<lstring> convert_split_words_allow_alphabet(const lstring <, const alphabet &allow) { lstring copy; lstring *ptr = (lstring *)< bool changed = false; static letter space(' '); for (size_t i = 0; i < lt.size(); ++i) { if (!allow.ok(lt[i])) { if (!changed) { copy = lt; ptr = © changed = true; } copy[i] = space; } } return convert_split_words(*ptr); }
alignment load_next_alignment(istream& ifile, const alphabet& a, const vector<string>& names) { shared_ptr<const alphabet> aa ( a.clone() ); alignment A = load_next_alignment(ifile,aa); return reorder_sequences(A,names); }
alignment load_next_alignment(istream& ifile, const alphabet& a) { shared_ptr<const alphabet> aa ( a.clone() ); return load_next_alignment(ifile,aa); }
ublas::matrix<int> unit_cost_matrix(const alphabet& a) { return unit_cost_matrix(a.size()); }
void show_frequencies(std::ostream& o,const alphabet& a,const std::valarray<double>& f) { for(int i=0;i<a.size();i++) o<<"f"<<a.lookup(i)<<" = "<<f[i]<<"\n"; }
alignment::alignment(const alphabet& a1,const string& filename) :a(a1.clone()) { load(filename); }
alignment::alignment(const alphabet& a1, const vector<sequence>& S) :sequences(S),array(0,S.size()),a(a1.clone()) {}
alignment::alignment(const alphabet& a1,int n) :sequences(vector<sequence>(n)),array(0,n),a(a1.clone()) { }
alignment::alignment(const alphabet& a1) :a(a1.clone()) {}
alignment::alignment(const alphabet& a1, const vector<sequence>& S) :array(0,S.size()),sequences(S),a(a1.clone()) { // Do NOT load the sequences here -- this is used for constructing // new alignment matrices during MCMC for some reason. }
alignment::alignment(const alphabet& a1,int n,int L) :array(L,n),sequences(vector<sequence>(n)),a(a1.clone()) { }