boost::shared_ptr<DParrayConstrained> sample_node_base(data_partition& P,const vector<int>& nodes) { default_timer_stack.push_timer("alignment::DP1/3-way"); const Tree& T = *P.T; assert(P.variable_alignment()); alignment old = *P.A; // std::cerr<<"old = "<<old<<endl; /*------------- Compute sequence properties --------------*/ int n0 = nodes[0]; int n1 = nodes[1]; int n2 = nodes[2]; int n3 = nodes[3]; vector<int> columns = getorder(old,n0,n1,n2,n3); // std::cerr<<"n0 = "<<n0<<" n1 = "<<n1<<" n2 = "<<n2<<" n3 = "<<n3<<std::endl; // std::cerr<<"old (reordered) = "<<project(old,n0,n1,n2,n3)<<endl; // Find sub-alignments and sequences vector<int> seq1; vector<int> seq2; vector<int> seq3; vector<int> seq123; for(int i=0; i<columns.size(); i++) { int column = columns[i]; if (not old.gap(column,n1)) seq1.push_back(column); if (not old.gap(column,n2)) seq2.push_back(column); if (not old.gap(column,n3)) seq3.push_back(column); if (not old.gap(column,n1) or not old.gap(column,n2) or not old.gap(column,n3)) seq123.push_back(column); } // Map columns with n2 or n3 to single index 'c' vector<int> icol(seq123.size()+1); vector<int> jcol(seq123.size()+1); vector<int> kcol(seq123.size()+1); icol[0] = 0; jcol[0] = 0; kcol[0] = 0; for(int c=1,i=0,j=0,k=0; c<seq123.size()+1; c++) { if (not old.gap(seq123[c-1],n1)) i++; if (not old.gap(seq123[c-1],n2)) j++; if (not old.gap(seq123[c-1],n3)) k++; icol[c] = i; jcol[c] = j; kcol[c] = k; } /*-------------- Create alignment matrices ---------------*/ // Cache which states emit which sequences vector<int> state_emit(nstates+1); for(int S2=0; S2<state_emit.size(); S2++) { state_emit[S2] = 0; if (di(S2) or dj(S2) or dk(S2)) state_emit[S2] |= (1<<0); } vector<int> branches; for(int i=1; i<nodes.size(); i++) branches.push_back(T.branch(nodes[0],nodes[i]) ); const Matrix Q = createQ(P.branch_HMMs,branches); vector<double> start_P = get_start_P(P.branch_HMMs,branches); // Actually create the Matrices & Chain boost::shared_ptr<DParrayConstrained> Matrices( new DParrayConstrained(seq123.size(),state_emit,start_P,Q, P.get_beta()) ); // Determine which states are allowed to match (c2) for(int c2=0; c2<Matrices->size(); c2++) { int i2 = icol[c2]; int j2 = jcol[c2]; int k2 = kcol[c2]; Matrices->states(c2).reserve(Matrices->nstates()); for(int i=0; i<Matrices->nstates(); i++) { int S2 = Matrices->order(i); //---------- Get (i1,j1,k1) ---------- int i1 = i2; if (di(S2)) i1--; int j1 = j2; if (dj(S2)) j1--; int k1 = k2; if (dk(S2)) k1--; //------ Get c1, check if valid ------ if (c2==0 or (i1 == i2 and j1 == j2 and k1 == k2) or (i1 == icol[c2-1] and j1 == jcol[c2-1] and k1 == kcol[c2-1]) ) Matrices->states(c2).push_back(S2); else { } // this state not allowed here } } /*------------------ Compute the DP matrix ---------------------*/ // Matrices.prune(); prune is broken! Matrices->forward(); //------------- Sample a path from the matrix -------------------// vector<int> path_g = Matrices->sample_path(); vector<int> path = Matrices->ungeneralize(path_g); *P.A = construct(old,path,n0,n1,n2,n3,T,seq1,seq2,seq3); for(int i=1; i<4; i++) { int b = T.branch(nodes[0],nodes[i]); P.note_alignment_changed_on_branch(b); } #ifndef NDEBUG vector<int> path_new = get_path_3way(project(*P.A,n0,n1,n2,n3),0,1,2,3); vector<int> path_new2 = get_path_3way(*P.A,n0,n1,n2,n3); assert(path_new == path_new2); // <- current implementation probably guarantees this // but its not a NECESSARY effect of the routine. // get the generalized paths - no sequential silent states that can loop vector<int> path_new_g = Matrices->generalize(path_new); assert(path_new_g == path_g); assert(valid(*P.A)); #endif default_timer_stack.pop_timer(); return Matrices; }
boost::shared_ptr<DPmatrixSimple> sample_alignment_base(data_partition& P,int b) { default_timer_stack.push_timer("alignment::DP2/2-way"); assert(P.variable_alignment()); dynamic_bitset<> s1 = constraint_satisfied(P.alignment_constraint, *P.A); const Tree& T = *P.T; alignment& A = *P.A; const Matrix frequency = substitution::frequency_matrix(P.SModel()); int node1 = T.branch(b).target(); int node2 = T.branch(b).source(); dynamic_bitset<> group1 = T.partition(node2,node1); // Find sub-alignments and sequences vector<int> seq1; vector<int> seq2; vector<int> seq12; for(int column=0;column<A.length();column++) { if (not A.gap(column,node1)) seq1.push_back(column); if (not A.gap(column,node2)) seq2.push_back(column); if (not A.gap(column,node1) or A.gap(column,node2)) seq12.push_back(column); } //FIXME - this makes the debug routines crash if (not seq1.size() or not seq2.size()) { default_timer_stack.pop_timer(); return boost::shared_ptr<DPmatrixSimple>(); //NULL; } /******** Precompute distributions at node2 from the 2 subtrees **********/ distributions_t_local distributions = distributions_tree; if (not P.smodel_full_tree) distributions = distributions_star; vector< Matrix > dists1 = distributions(P,seq1,b,true); vector< Matrix > dists2 = distributions(P,seq2,b,false); vector<int> state_emit(4,0); state_emit[0] |= (1<<1)|(1<<0); state_emit[1] |= (1<<1); state_emit[2] |= (1<<0); state_emit[3] |= 0; boost::shared_ptr<DPmatrixSimple> Matrices( new DPmatrixSimple(state_emit, P.branch_HMMs[b].start_pi(), P.branch_HMMs[b], P.get_beta(), P.SModel().distribution(), dists1, dists2, frequency) ); //------------------ Compute the DP matrix ---------------------// vector<int> path_old = get_path(A,node1,node2); vector<vector<int> > pins = get_pins(P.alignment_constraint,A,group1,~group1,seq1,seq2,seq12); vector<int> path = Matrices->forward(pins); path.erase(path.begin()+path.size()-1); *P.A = construct(A,path,node1,node2,T,seq1,seq2); P.LC.invalidate_branch_alignment(T,b); P.note_alignment_changed_on_branch(b); #ifndef NDEBUG_DP assert(valid(*P.A)); vector<int> path_new = get_path(*P.A, node1, node2); path.push_back(3); assert(path_new == path); #endif default_timer_stack.pop_timer(); return Matrices; }