Esempio n. 1
0
boost::shared_ptr<DParrayConstrained> sample_node_base(data_partition& P,const vector<int>& nodes)
{
    default_timer_stack.push_timer("alignment::DP1/3-way");
    const Tree& T = *P.T;

    assert(P.variable_alignment());

    alignment old = *P.A;

    //  std::cerr<<"old = "<<old<<endl;

    /*------------- Compute sequence properties --------------*/
    int n0 = nodes[0];
    int n1 = nodes[1];
    int n2 = nodes[2];
    int n3 = nodes[3];
    vector<int> columns = getorder(old,n0,n1,n2,n3);

    //  std::cerr<<"n0 = "<<n0<<"   n1 = "<<n1<<"    n2 = "<<n2<<"    n3 = "<<n3<<std::endl;
    //  std::cerr<<"old (reordered) = "<<project(old,n0,n1,n2,n3)<<endl;

    // Find sub-alignments and sequences
    vector<int> seq1;
    vector<int> seq2;
    vector<int> seq3;
    vector<int> seq123;
    for(int i=0; i<columns.size(); i++) {
        int column = columns[i];
        if (not old.gap(column,n1))
            seq1.push_back(column);
        if (not old.gap(column,n2))
            seq2.push_back(column);
        if (not old.gap(column,n3))
            seq3.push_back(column);
        if (not old.gap(column,n1) or not old.gap(column,n2) or not old.gap(column,n3))
            seq123.push_back(column);
    }

    // Map columns with n2 or n3 to single index 'c'
    vector<int> icol(seq123.size()+1);
    vector<int> jcol(seq123.size()+1);
    vector<int> kcol(seq123.size()+1);

    icol[0] = 0;
    jcol[0] = 0;
    kcol[0] = 0;
    for(int c=1,i=0,j=0,k=0; c<seq123.size()+1; c++) {
        if (not old.gap(seq123[c-1],n1))
            i++;
        if (not old.gap(seq123[c-1],n2))
            j++;
        if (not old.gap(seq123[c-1],n3))
            k++;
        icol[c] = i;
        jcol[c] = j;
        kcol[c] = k;
    }


    /*-------------- Create alignment matrices ---------------*/

    // Cache which states emit which sequences
    vector<int> state_emit(nstates+1);
    for(int S2=0; S2<state_emit.size(); S2++) {
        state_emit[S2] = 0;

        if (di(S2) or dj(S2) or dk(S2))
            state_emit[S2] |= (1<<0);
    }


    vector<int> branches;
    for(int i=1; i<nodes.size(); i++)
        branches.push_back(T.branch(nodes[0],nodes[i]) );

    const Matrix Q = createQ(P.branch_HMMs,branches);
    vector<double> start_P = get_start_P(P.branch_HMMs,branches);


    // Actually create the Matrices & Chain
    boost::shared_ptr<DParrayConstrained>
    Matrices( new DParrayConstrained(seq123.size(),state_emit,start_P,Q, P.get_beta())
            );

    // Determine which states are allowed to match (c2)
    for(int c2=0; c2<Matrices->size(); c2++) {
        int i2 = icol[c2];
        int j2 = jcol[c2];
        int k2 = kcol[c2];
        Matrices->states(c2).reserve(Matrices->nstates());
        for(int i=0; i<Matrices->nstates(); i++) {
            int S2 = Matrices->order(i);

            //---------- Get (i1,j1,k1) ----------
            int i1 = i2;
            if (di(S2)) i1--;

            int j1 = j2;
            if (dj(S2)) j1--;

            int k1 = k2;
            if (dk(S2)) k1--;

            //------ Get c1, check if valid ------
            if (c2==0
                    or (i1 == i2 and j1 == j2 and k1 == k2)
                    or (i1 == icol[c2-1] and j1 == jcol[c2-1] and k1 == kcol[c2-1]) )
                Matrices->states(c2).push_back(S2);
            else
            { } // this state not allowed here
        }
    }


    /*------------------ Compute the DP matrix ---------------------*/
    // Matrices.prune();  prune is broken!
    Matrices->forward();

    //------------- Sample a path from the matrix -------------------//

    vector<int> path_g = Matrices->sample_path();
    vector<int> path = Matrices->ungeneralize(path_g);

    *P.A = construct(old,path,n0,n1,n2,n3,T,seq1,seq2,seq3);
    for(int i=1; i<4; i++) {
        int b = T.branch(nodes[0],nodes[i]);
        P.note_alignment_changed_on_branch(b);
    }

#ifndef NDEBUG
    vector<int> path_new = get_path_3way(project(*P.A,n0,n1,n2,n3),0,1,2,3);
    vector<int> path_new2 = get_path_3way(*P.A,n0,n1,n2,n3);
    assert(path_new == path_new2); // <- current implementation probably guarantees this
    //    but its not a NECESSARY effect of the routine.

    // get the generalized paths - no sequential silent states that can loop
    vector<int> path_new_g = Matrices->generalize(path_new);
    assert(path_new_g == path_g);
    assert(valid(*P.A));


#endif

    default_timer_stack.pop_timer();
    return Matrices;
}
Esempio n. 2
0
boost::shared_ptr<DPmatrixSimple> sample_alignment_base(data_partition& P,int b) 
{
  default_timer_stack.push_timer("alignment::DP2/2-way");
  assert(P.variable_alignment());

  dynamic_bitset<> s1 = constraint_satisfied(P.alignment_constraint, *P.A);

  const Tree& T = *P.T;
  alignment& A = *P.A;

  const Matrix frequency = substitution::frequency_matrix(P.SModel());

  int node1 = T.branch(b).target();
  int node2 = T.branch(b).source();

  dynamic_bitset<> group1 = T.partition(node2,node1);

  // Find sub-alignments and sequences
  vector<int> seq1;
  vector<int> seq2;
  vector<int> seq12;

  for(int column=0;column<A.length();column++)
  {
    if (not A.gap(column,node1))
      seq1.push_back(column);
    if (not A.gap(column,node2))
      seq2.push_back(column);

    if (not A.gap(column,node1) or A.gap(column,node2))
      seq12.push_back(column);
  }

  //FIXME - this makes the debug routines crash
  if (not seq1.size() or not seq2.size()) 
  {
    default_timer_stack.pop_timer();
    return boost::shared_ptr<DPmatrixSimple>(); //NULL;
  }

  /******** Precompute distributions at node2 from the 2 subtrees **********/
  distributions_t_local distributions = distributions_tree;
  if (not P.smodel_full_tree)
    distributions = distributions_star;

  vector< Matrix > dists1 = distributions(P,seq1,b,true);
  vector< Matrix > dists2 = distributions(P,seq2,b,false);

  vector<int> state_emit(4,0);
  state_emit[0] |= (1<<1)|(1<<0);
  state_emit[1] |= (1<<1);
  state_emit[2] |= (1<<0);
  state_emit[3] |= 0;

  boost::shared_ptr<DPmatrixSimple> 
    Matrices( new DPmatrixSimple(state_emit, P.branch_HMMs[b].start_pi(),
				 P.branch_HMMs[b], P.get_beta(),
				 P.SModel().distribution(), dists1, dists2, frequency)
	      );

  //------------------ Compute the DP matrix ---------------------//
  vector<int> path_old = get_path(A,node1,node2);
  vector<vector<int> > pins = get_pins(P.alignment_constraint,A,group1,~group1,seq1,seq2,seq12);

  vector<int> path = Matrices->forward(pins);

  path.erase(path.begin()+path.size()-1);

  *P.A = construct(A,path,node1,node2,T,seq1,seq2);
  P.LC.invalidate_branch_alignment(T,b);
  P.note_alignment_changed_on_branch(b);

#ifndef NDEBUG_DP
  assert(valid(*P.A));

  vector<int> path_new = get_path(*P.A, node1, node2);
  path.push_back(3);
  assert(path_new == path);
#endif

  default_timer_stack.pop_timer();
  return Matrices;
}