Esempio n. 1
0
log_double_t correction(const data_partition& P,const vector<int>& nodes)
{
    if (P.variable_alignment())
    {
        // get the lengths of then internal node
        int length = P.seqlength(nodes[0]);

        return pow(P.sequence_length_pr(length), 2);
    }
    else
        return 1;
}
Esempio n. 2
0
boost::shared_ptr<DParrayConstrained> sample_node_base(data_partition& P,const vector<int>& nodes)
{
    default_timer_stack.push_timer("alignment::DP1/3-way");
    const Tree& T = *P.T;

    assert(P.variable_alignment());

    alignment old = *P.A;

    //  std::cerr<<"old = "<<old<<endl;

    /*------------- Compute sequence properties --------------*/
    int n0 = nodes[0];
    int n1 = nodes[1];
    int n2 = nodes[2];
    int n3 = nodes[3];
    vector<int> columns = getorder(old,n0,n1,n2,n3);

    //  std::cerr<<"n0 = "<<n0<<"   n1 = "<<n1<<"    n2 = "<<n2<<"    n3 = "<<n3<<std::endl;
    //  std::cerr<<"old (reordered) = "<<project(old,n0,n1,n2,n3)<<endl;

    // Find sub-alignments and sequences
    vector<int> seq1;
    vector<int> seq2;
    vector<int> seq3;
    vector<int> seq123;
    for(int i=0; i<columns.size(); i++) {
        int column = columns[i];
        if (not old.gap(column,n1))
            seq1.push_back(column);
        if (not old.gap(column,n2))
            seq2.push_back(column);
        if (not old.gap(column,n3))
            seq3.push_back(column);
        if (not old.gap(column,n1) or not old.gap(column,n2) or not old.gap(column,n3))
            seq123.push_back(column);
    }

    // Map columns with n2 or n3 to single index 'c'
    vector<int> icol(seq123.size()+1);
    vector<int> jcol(seq123.size()+1);
    vector<int> kcol(seq123.size()+1);

    icol[0] = 0;
    jcol[0] = 0;
    kcol[0] = 0;
    for(int c=1,i=0,j=0,k=0; c<seq123.size()+1; c++) {
        if (not old.gap(seq123[c-1],n1))
            i++;
        if (not old.gap(seq123[c-1],n2))
            j++;
        if (not old.gap(seq123[c-1],n3))
            k++;
        icol[c] = i;
        jcol[c] = j;
        kcol[c] = k;
    }


    /*-------------- Create alignment matrices ---------------*/

    // Cache which states emit which sequences
    vector<int> state_emit(nstates+1);
    for(int S2=0; S2<state_emit.size(); S2++) {
        state_emit[S2] = 0;

        if (di(S2) or dj(S2) or dk(S2))
            state_emit[S2] |= (1<<0);
    }


    vector<int> branches;
    for(int i=1; i<nodes.size(); i++)
        branches.push_back(T.branch(nodes[0],nodes[i]) );

    const Matrix Q = createQ(P.branch_HMMs,branches);
    vector<double> start_P = get_start_P(P.branch_HMMs,branches);


    // Actually create the Matrices & Chain
    boost::shared_ptr<DParrayConstrained>
    Matrices( new DParrayConstrained(seq123.size(),state_emit,start_P,Q, P.get_beta())
            );

    // Determine which states are allowed to match (c2)
    for(int c2=0; c2<Matrices->size(); c2++) {
        int i2 = icol[c2];
        int j2 = jcol[c2];
        int k2 = kcol[c2];
        Matrices->states(c2).reserve(Matrices->nstates());
        for(int i=0; i<Matrices->nstates(); i++) {
            int S2 = Matrices->order(i);

            //---------- Get (i1,j1,k1) ----------
            int i1 = i2;
            if (di(S2)) i1--;

            int j1 = j2;
            if (dj(S2)) j1--;

            int k1 = k2;
            if (dk(S2)) k1--;

            //------ Get c1, check if valid ------
            if (c2==0
                    or (i1 == i2 and j1 == j2 and k1 == k2)
                    or (i1 == icol[c2-1] and j1 == jcol[c2-1] and k1 == kcol[c2-1]) )
                Matrices->states(c2).push_back(S2);
            else
            { } // this state not allowed here
        }
    }


    /*------------------ Compute the DP matrix ---------------------*/
    // Matrices.prune();  prune is broken!
    Matrices->forward();

    //------------- Sample a path from the matrix -------------------//

    vector<int> path_g = Matrices->sample_path();
    vector<int> path = Matrices->ungeneralize(path_g);

    *P.A = construct(old,path,n0,n1,n2,n3,T,seq1,seq2,seq3);
    for(int i=1; i<4; i++) {
        int b = T.branch(nodes[0],nodes[i]);
        P.note_alignment_changed_on_branch(b);
    }

#ifndef NDEBUG
    vector<int> path_new = get_path_3way(project(*P.A,n0,n1,n2,n3),0,1,2,3);
    vector<int> path_new2 = get_path_3way(*P.A,n0,n1,n2,n3);
    assert(path_new == path_new2); // <- current implementation probably guarantees this
    //    but its not a NECESSARY effect of the routine.

    // get the generalized paths - no sequential silent states that can loop
    vector<int> path_new_g = Matrices->generalize(path_new);
    assert(path_new_g == path_g);
    assert(valid(*P.A));


#endif

    default_timer_stack.pop_timer();
    return Matrices;
}
Esempio n. 3
0
boost::shared_ptr<DPmatrixSimple> sample_alignment_base(data_partition& P,int b) 
{
  default_timer_stack.push_timer("alignment::DP2/2-way");
  assert(P.variable_alignment());

  dynamic_bitset<> s1 = constraint_satisfied(P.alignment_constraint, *P.A);

  const Tree& T = *P.T;
  alignment& A = *P.A;

  const Matrix frequency = substitution::frequency_matrix(P.SModel());

  int node1 = T.branch(b).target();
  int node2 = T.branch(b).source();

  dynamic_bitset<> group1 = T.partition(node2,node1);

  // Find sub-alignments and sequences
  vector<int> seq1;
  vector<int> seq2;
  vector<int> seq12;

  for(int column=0;column<A.length();column++)
  {
    if (not A.gap(column,node1))
      seq1.push_back(column);
    if (not A.gap(column,node2))
      seq2.push_back(column);

    if (not A.gap(column,node1) or A.gap(column,node2))
      seq12.push_back(column);
  }

  //FIXME - this makes the debug routines crash
  if (not seq1.size() or not seq2.size()) 
  {
    default_timer_stack.pop_timer();
    return boost::shared_ptr<DPmatrixSimple>(); //NULL;
  }

  /******** Precompute distributions at node2 from the 2 subtrees **********/
  distributions_t_local distributions = distributions_tree;
  if (not P.smodel_full_tree)
    distributions = distributions_star;

  vector< Matrix > dists1 = distributions(P,seq1,b,true);
  vector< Matrix > dists2 = distributions(P,seq2,b,false);

  vector<int> state_emit(4,0);
  state_emit[0] |= (1<<1)|(1<<0);
  state_emit[1] |= (1<<1);
  state_emit[2] |= (1<<0);
  state_emit[3] |= 0;

  boost::shared_ptr<DPmatrixSimple> 
    Matrices( new DPmatrixSimple(state_emit, P.branch_HMMs[b].start_pi(),
				 P.branch_HMMs[b], P.get_beta(),
				 P.SModel().distribution(), dists1, dists2, frequency)
	      );

  //------------------ Compute the DP matrix ---------------------//
  vector<int> path_old = get_path(A,node1,node2);
  vector<vector<int> > pins = get_pins(P.alignment_constraint,A,group1,~group1,seq1,seq2,seq12);

  vector<int> path = Matrices->forward(pins);

  path.erase(path.begin()+path.size()-1);

  *P.A = construct(A,path,node1,node2,T,seq1,seq2);
  P.LC.invalidate_branch_alignment(T,b);
  P.note_alignment_changed_on_branch(b);

#ifndef NDEBUG_DP
  assert(valid(*P.A));

  vector<int> path_new = get_path(*P.A, node1, node2);
  path.push_back(3);
  assert(path_new == path);
#endif

  default_timer_stack.pop_timer();
  return Matrices;
}
Esempio n. 4
0
boost::shared_ptr<DPmatrixConstrained> tri_sample_alignment_base(data_partition& P,const vector<int>& nodes)
{
  const Tree& T = *P.T;
  alignment& A = *P.A;

  assert(P.variable_alignment());

  assert(T.is_connected(nodes[0],nodes[1]));
  assert(T.is_connected(nodes[0],nodes[2]));
  assert(T.is_connected(nodes[0],nodes[3]));

  const Matrix frequency = substitution::frequency_matrix(P.SModel());

  // std::cerr<<"A = "<<A<<endl;

  //------------- Compute sequence properties --------------//
  dynamic_bitset<> group1 = T.partition(nodes[0],nodes[1]);
  dynamic_bitset<> group2 = T.partition(nodes[0],nodes[2]);
  dynamic_bitset<> group3 = T.partition(nodes[0],nodes[3]);


  //  std::clog<<"n0 = "<<nodes[0]<<"   n1 = "<<nodes[1]<<"    n2 = "<<nodes[2]<<"    n3 = "<<nodes[3]<<std::endl;
  //  std::clog<<"A (reordered) = "<<project(A,nodes[0],nodes[1],nodes[2],nodes[3])<<endl;
  vector<int> columns = getorder(A,nodes[0],nodes[1],nodes[2],nodes[3]);

#ifndef NDEBUG

  // getorder(project(A,...)...) is not the same as getorder(A,...) because columns that are
  // in both project(A,...) and A have different columns numbers in each alignment, and
  // project(A,...) is shorter.

  // However, the NUMBER of columns should be the same. 
  vector<int> columns2 = getorder(project(A,nodes[0],nodes[1],nodes[2],nodes[3]),0,1,2,3);
  assert(columns.size() == columns2.size());
#endif

  // Find sub-alignments and sequences
  vector<int> seq1; seq1.reserve(A.length());
  vector<int> seq2; seq2.reserve(A.length());
  vector<int> seq3; seq3.reserve(A.length());
  vector<int> seq23; seq23.reserve(A.length());
  for(int i=0;i<columns.size();i++) {
    int column = columns[i];
    if (not A.gap(column,nodes[1]))
      seq1.push_back(column);
    if (not A.gap(column,nodes[2]))
      seq2.push_back(column);
    if (not A.gap(column,nodes[3]))
      seq3.push_back(column);

    if (not A.gap(column,nodes[2]) or not A.gap(column,nodes[3]))
      seq23.push_back(column);
  }

  // Map columns with n2 or n3 to single index 'c'
  vector<int> jcol(seq23.size()+1);
  vector<int> kcol(seq23.size()+1);

  jcol[0] = 0;
  kcol[0] = 0;
  for(int c=1,j=0,k=0;c<seq23.size()+1;c++) {
    if (not A.gap(seq23[c-1],nodes[2]))
      j++;    
    if (not A.gap(seq23[c-1],nodes[3]))
      k++;
    jcol[c] = j;
    kcol[c] = k;
  }

  // Precompute distributions at nodes[0]
  distributions_t distributions = distributions_tree;
  if (not P.smodel_full_tree)
    distributions = distributions_star;

  vector< Matrix > dists1 = distributions(P,seq1,nodes[0],group1);
  vector< Matrix > dists23 = distributions(P,seq23,nodes[0],group2|group3);


  //-------------- Create alignment matrices ---------------//

  vector<int> branches(3);
  for(int i=0;i<3;i++)
    branches[i] = T.branch(nodes[0],nodes[i+1]);

  const Matrix Q = createQ(P.branch_HMMs, branches);
  vector<double> start_P = get_start_P(P.branch_HMMs,branches);

  // Actually create the Matrices & Chain
  boost::shared_ptr<DPmatrixConstrained> 
    Matrices(new DPmatrixConstrained(get_state_emit(), start_P, Q, P.beta[0],
				     P.SModel().distribution(), dists1, dists23, frequency)
	     );

  // Determine which states are allowed to match (,c2)
  for(int c2=0;c2<dists23.size()-1;c2++) 
  {
    int j2 = jcol[c2];
    int k2 = kcol[c2];
    Matrices->states(c2).reserve(Matrices->nstates());
    for(int i=0;i<Matrices->nstates();i++) {
      int S2 = Matrices->order(i);

      //---------- Get (,j1,k1) ----------
      int j1 = j2;
      if (dj(S2)) 
	j1--;

      int k1 = k2;
      if (dk(S2)) 
	k1--;
      
      //------ Get c1, check if valid ------
      if (c2==0 or (j1 == j2 and k1 == k2) or (j1 == jcol[c2-1] and k1 == kcol[c2-1]) )
	Matrices->states(c2+1).push_back(S2);
      else
	{ } // this state not allowed here
    }
  }


  //------------------ Compute the DP matrix ---------------------//

  //   Matrices.prune(); prune is broken!
  
  //  vector<int> path_old = get_path_3way(project(A,nodes[0],nodes[1],nodes[2],nodes[3]),0,1,2,3);
  //  vector<int> path_old_g = Matrices.generalize(path_old);

  //  vector<int> path_g = Matrices.forward(P.features,(int)P.constants[0],path_old_g);
  vector<vector<int> > pins = get_pins(P.alignment_constraint,A,group1,group2 | group3,seq1,seq23,columns);

  // if the constraints are currently met but cannot be met
  if (pins.size() == 1 and pins[0][0] == -1)
    ; //std::cerr<<"Constraints cannot be expressed in terms of DP matrix paths!"<<std::endl;
  else {
    Matrices->forward_constrained(pins);
    if (Matrices->Pr_sum_all_paths() <= 0.0) 
      std::cerr<<"Constraints give this choice probability 0"<<std::endl;
  }

  if (Matrices->Pr_sum_all_paths() <= 0.0) 
    return Matrices;

  vector<int> path_g = Matrices->sample_path();

  vector<int> path = Matrices->ungeneralize(path_g);

  A = construct(A,path,nodes[0],nodes[1],nodes[2],nodes[3],T,seq1,seq2,seq3);
  for(int i=1;i<4;i++) {
    int b = T.branch(nodes[0],nodes[i]);
    P.note_alignment_changed_on_branch(b);
  }

#ifndef NDEBUG_DP
  //--------------- Check alignment construction ------------------//
  vector<int> path_new = get_path_3way(project(A,nodes),0,1,2,3);

  vector<int> path_new2 = get_path_3way(A,nodes);
  assert(path_new == path_new2); // <- current implementation probably guarantees this
                                 //    but its not a NECESSARY effect of the routine.
                                 //    due to ordering stuff required in the path but
                                 //    not store in the alignment A.
  vector<int> path_new_g = Matrices->generalize(path_new);
  if (path_new_g != path_g) {
    std::clog<<"A' (reordered) = "<<project(A,nodes)<<endl;
    std::clog<<"A' = "<<A<<endl;
    std::abort();
  }

  assert(valid(A));
#endif

  //  std::cerr<<"[tri]bandwidth = "<<bandwidth(Matrices,path_g)<<std::endl;

  //  std::cerr<<"[tri]bandwidth2 = "<<bandwidth2(Matrices,path_g)<<std::endl;

#ifndef NDEBUG_DP
  check_alignment(A,T,"sample_tri_base:out");
#else
  Matrices->clear();
#endif

  P.LC.set_length(A.length());
  int b = T.branch(nodes[0],nodes[1]);
  P.LC.invalidate_branch_alignment(T, b);

  return Matrices;
}