Example #1
0
/// Do a SPR move on T1, moving the subtree behind b1_ to branch b2
double do_SPR(SequenceTree& T1, int b1_,int b2)
{
    const_branchview b1 = T1.directed_branch(b1_);

    SequenceTree T2 = T1;

    //------ Generate the new topology ------//
    if (T2.directed_branch(b2).target() == b1.target() or
            T2.directed_branch(b2).source() == b1.target())
        ;
    else
        SPR(T2,b1.reverse(),b2);

    //------ Find the two new branches ------//
    vector<const_branchview> connected1;
    append(T1.directed_branch(b1.source(),b1.target()).branches_after(),connected1);

    vector<const_branchview> connected2;
    append(T2.directed_branch(b1.source(),b1.target()).branches_after(),connected2);

    assert(connected1.size() == 2);
    assert(connected2.size() == 2);

    //------- Place the split randomly -------//
    double L1 = connected1[0].length() + connected1[1].length();
    double L2 = connected2[0].length() + connected2[1].length();

    T2.directed_branch(connected2[0]).set_length( myrandomf() * L2 );
    T2.directed_branch(connected2[1]).set_length( L2 - T2.directed_branch(connected2[0]).length() );

    T1 = T2;

    return L2/L1;
}
Example #2
0
int choose_SPR_target(SequenceTree& T1, int b1_)
{
    const_branchview b1 = T1.directed_branch(b1_);

    //----- Select the branch to move to ------//
    dynamic_bitset<> subtree_nodes = T1.partition(b1.reverse());
    subtree_nodes[b1.target()] = true;

    vector<int> branches;
    vector<double> lengths;

    for(int i=0; i<T1.n_branches(); i++)
    {
        const_branchview bi = T1.branch(i);

        // skip branch if its contained in the subtree
        if (subtree_nodes[bi.target()] and
                subtree_nodes[bi.source()])
            continue;

        double L = 1.0;

        // down-weight branch if it is one of the subtree's 2 neighbors
        if (subtree_nodes[bi.target()] or
                subtree_nodes[bi.source()])
            L = 0.5;

        branches.push_back(i);
        lengths.push_back(L);
    }

    int b2 = branches[ choose(lengths) ];

    return b2;
}
Example #3
0
bool update_lengths(const SequenceTree& Q,const SequenceTree& T,
		    valarray<double>& branch_lengths, 
		    valarray<double>& branch_lengths_squared, 
		    valarray<double>& node_lengths)
{
  // map branches from Q -> T
  vector<int> branches_map = extends_map(T,Q);
  if (not branches_map.size())
    return false;

  // incorporate lengths of branches that map to Q
  for(int b=0;b<Q.n_branches();b++)
  {
    int b2 = branches_map[b];
    double L = T.directed_branch(b2).length();
    branch_lengths[b] += L;
    branch_lengths_squared[b] += L*L;
  }

  // map nodes from T -> Q
  vector<int> nodes_map = get_nodes_map(Q,T,branches_map);

  // incorprate lengths of branches that map to nodes in Q
  for(int i=T.n_leafbranches();i<T.n_branches();i++) 
  {
    const_branchview b = T.branch(i);
    int n1 = nodes_map[b.source()];
    int n2 = nodes_map[b.target()];

    if (n1 == n2)
      node_lengths[n1] += T.branch(i).length();
  }

  return true;
}
Example #4
0
B n_mutations(const alphabet& a, const vector<int>& letters, const SequenceTree& T,const ublas::matrix<B>& cost,
	      ublas::matrix<B>& n_muts, const vector<const_branchview>& branches)
{
  int root = T.directed_branch(0).target();

  peel_n_mutations(a,letters,T,cost,n_muts,branches);

  return row_min(n_muts,root);
}
Example #5
0
B n_mutations(const alphabet& a, const vector<int>& letters, const SequenceTree& T,const ublas::matrix<B>& cost)
{
  int root = T.directed_branch(0).target();

  vector<const_branchview> branches = branches_toward_node(T,root);

  ublas::matrix<B> n_muts(T.n_nodes(), a.size());

  return n_mutations(a,letters,T,cost,n_muts,branches);
}
Example #6
0
vector<vector<int> > get_all_parsimony_letters(const alphabet& a, const vector<int>& letters, const SequenceTree& T,
					       const ublas::matrix<int>& cost)
{
  int root = T.directed_branch(0).target();

  ublas::matrix<int> n_muts(T.n_nodes(), a.size());
  peel_n_mutations(a,letters,T,cost,n_muts, branches_toward_node(T,root) );

  // get an order list of branches point away from the root;
  vector<const_branchview> branches = branches_from_node(T,root);
  std::reverse(branches.begin(),branches.end());
  
  // Allocate space to store the letters for each node
  vector<vector<int> > node_letters(T.n_nodes());

  const unsigned A = a.size();

  // choose the cheapest letters at the root
  {
    double m = row_min(n_muts,root);
    for(int l=0;l<A;l++)
      if (n_muts(root,l) <= m)
	node_letters[root].push_back(l);
  }

  vector<double> temp(A);

  for(int i=0;i<branches.size();i++) 
  {
    int s = branches[i].source();
    int t = branches[i].target();

    vector<double> best(node_letters[s].size());

    for(int j=0;j<node_letters[s].size();j++) 
    {
      for(int l=0;l<A;l++)
	temp[l] = n_muts(t,l)+cost(l,node_letters[s][j]);
      best[j] = min(temp);
    }
    
    for(int l=0;l<A;l++) 
    {
      bool is_best = false;
      for(int j=0;j<node_letters[s].size() and not is_best;j++) 
	if (n_muts(t,l)+cost(l,node_letters[s][j]) <= best[j])
	  is_best=true;
      if (is_best)
	node_letters[t].push_back(l);
    }

  }

  return node_letters;
}
Example #7
0
B n_mutations(const alignment& A, const SequenceTree& T,const ublas::matrix<B>& cost)
{
  const alphabet& a = A.get_alphabet();

  vector<int> letters(T.n_leaves());

  int root = T.directed_branch(0).target();

  vector<const_branchview> branches = branches_toward_node(T,root);

  ublas::matrix<B> n_muts(T.n_nodes(), a.size());

  double tree_length = 0;
  for(int c=0;c<A.length();c++) {
    for(int i=0;i<T.n_leaves();i++)
      letters[i] = A(c,i);
    double length = n_mutations<B>(a,letters,T,cost,n_muts,branches);
    tree_length += length;
  }

  return tree_length;
}
Example #8
0
vector<int> get_parsimony_letters(const alphabet& a, const vector<int>& letters, const SequenceTree& T,
				  const ublas::matrix<int>& cost)
{
  int root = T.directed_branch(0).target();
  ublas::matrix<int> n_muts(T.n_nodes(),a.size());

  peel_n_mutations(a,letters,T,cost,n_muts, branches_toward_node(T,root) );

  // get an order list of branches point away from the root;
  vector<const_branchview> branches = branches_from_node(T,root);
  std::reverse(branches.begin(),branches.end());
  
  // Allocate space to store the letter for each node
  vector<int> node_letters(T.n_nodes(),-1);

  // choose the cheapest letter at the root
  node_letters[root] = row_min(n_muts,root);

  const unsigned A = a.size();
  vector<double> temp(A);

  for(int i=0;i<branches.size();i++) 
  {
    int s = branches[i].source();
    int t = branches[i].target();

    int k = node_letters[s];
    assert(k != -1);

    for(int l=0;l<A;l++)
      temp[l] = n_muts(t,l)+cost(l,k);

    node_letters[t] = argmin(temp);
  }

  return node_letters;
}
Example #9
0
// mark nodes in T according to what node of Q they map to
vector<int> get_nodes_map(const SequenceTree& Q,const SequenceTree& T,
			  const vector<int>& branches_map)
{
  assert(branches_map.size() == Q.n_branches() * 2);

  vector<int> nodes_map(T.n_nodes(),-1);

  // map nodes from T -> Q that are in both trees
  for(int b=0;b<Q.n_branches();b++)
  {
    int Q_source = Q.branch(b).source();
    int Q_target = Q.branch(b).target();

    int b2 = branches_map[b];

    int T_source = T.directed_branch(b2).source();
    int T_target = T.directed_branch(b2).target();

    if (nodes_map[T_source] == -1)
      nodes_map[T_source] = Q_source;
    else
      assert(nodes_map[T_source] == Q_source);

    if (nodes_map[T_target] == -1)
      nodes_map[T_target] = Q_target;
    else
      assert(nodes_map[T_target] == Q_target);
  }

  // map the rest of the nodes from T -> Q
  for(int i=Q.n_leaves();i<Q.n_nodes();i++) 
  {
    unsigned D = Q[i].degree();
    if (D <= 3) continue;

    // get a branch of Q pointing into the node
    const_branchview outside = *(Q[i].branches_in());
    // get a branch of T pointing into the node
    outside = T.directed_branch(branches_map[outside.name()]);

    list<const_branchview> branches;
    typedef list<const_branchview>::iterator list_iterator;
    append(outside.branches_after(),branches);
    for(list_iterator b = branches.begin() ; b != branches.end();)
    {
      int node = (*b).target();
      if (nodes_map[node] == -1)
	nodes_map[node] = i;

      if (nodes_map[node] == i) {
	append((*b).branches_after(),branches);
	b++;
      }
      else {
	list_iterator prev = b;
	b++;
	branches.erase(prev);
      }
    }
    assert(branches.size() == D-3);
  }

  for(int i=0;i<nodes_map.size();i++)
    assert(nodes_map[i] != -1);

  return nodes_map;
}
Example #10
0
int main(int argc,char* argv[])
{ 
  try {
    //---------- Parse command line  -------//
    variables_map args = parse_cmd_line(argc,argv);

    //----------- Load alignment and tree ---------//
    alignment A;
    SequenceTree T;
    if (args.count("tree"))
      load_A_and_T(args,A,T,false);
    else
      A = load_A(args,false);

    const alphabet& a = A.get_alphabet();
    
    //------- Load groups and find branches -------//
    vector<sequence_group> groups;
    if (args.count("groups")) 
      groups = load_groups(A,args["groups"].as<string>());

    for(int i=0;i<groups.size();i++) {
      cerr<<groups[i].name<<": ";
      for(int j=0;j<groups[i].taxa.size();j++)
	cerr<<A.seq(groups[i].taxa[j]).name<<" ";
      cerr<<endl;
    }

    vector<int> group_branches;
    if (args.count("tree"))
    {
      for(int i=0;i<groups.size();i++)
      {
	dynamic_bitset<> p(T.n_leaves());
	for(int j=0;j<groups[i].taxa.size();j++)
	  p[groups[i].taxa[j]] = true;

	int found = -1;
	for(int b=0;b<2*T.n_branches() and found == -1;b++)
	  if (p == branch_partition(T,b))
	    found = b;
	if (found == -1)
	  throw myexception()<<"I can't find group "<<i+1<<" on the tree!";
	
	group_branches.push_back(found);
      }
    }

    vector<string> group_names;
    for(int i=0;i<groups.size();i++)
      group_names.push_back(groups[i].name);

    vector<Partition> splits;
    if (args.count("split")) 
    {
      vector<string> split = args["split"].as<vector<string> >();
      for(int i=0;i<split.size();i++) 
	splits.push_back(Partition(group_names,split[i]));
    }

    //-------------------------------------------//

    Matrix C(A.length(),A.n_sequences()+1);
    for(int i=0;i<C.size1();i++)
      for(int j=0;j<C.size2();j++)
	C(i,j) = 0;

    // yes but, how much more conservation THAN EXPECTED do we see?

    for(int c=0;c<C.size1();c++) 
    {
      vector<bool> interesting(groups.size(), true);

      //-------------------------------------------------------//
      vector<int> leaf_letters( T.n_leaves() );
      for(int j=0;j<leaf_letters.size();j++)
	leaf_letters[j] = A(c,j);
      vector<vector<int> > node_letters = get_all_parsimony_letters(a,leaf_letters,T,unit_cost_matrix(a));

      vector<vector<int> > initial_value(groups.size());
      for(int g=0;g<groups.size();g++) 
      {
	int n = T.directed_branch(group_branches[g]).target();
	initial_value[g] = node_letters[n];
      }

      //------------ find 'group conserved at' values ----------//
      vector<int> value(groups.size(),alphabet::gap);

      for(int g=0;g<groups.size();g++) 
      {
	vector<int> temp;
	for(int i=0;i<groups[g].taxa.size();i++)
	  temp.push_back(A(c,groups[g].taxa[i]));

	int best = most_common(temp);
	int count = number_of(temp,best);
	
	if (count >= groups[g].taxa.size()-1 and count >=3 and count> groups[g].taxa.size()/2)
	  value[g] = best;
      }

      //-------- Determine whether column is interesting --------//
      if (args.count("require-all-different"))
      {
	if (args.count("split"))
	{
	  vector<bool> in_changed_split(groups.size(),false);
	  for(int i=0;i<splits.size();i++) 
	  {
	    bool some_different = false;
	    for(int g1=0;g1<groups.size();g1++)
	      for(int g2=0;g2<groups.size();g2++)
		if (splits[i].group1[g1] and splits[i].group2[g2]) {
		  if (all_different(A,c,groups,g1,g2))
		    some_different = true;
		}
	  
	    bool no_same = true;
	    for(int g1=0;g1<groups.size();g1++)
	      for(int g2=0;g2<groups.size();g2++)
		if (splits[i].group1[g1] and splits[i].group2[g2])
		  if (not all_different(A,c,groups,g1,g2))
		    no_same = false;

	    if (some_different and no_same) 
	      for(int g=0;g<groups.size();g++)
		if (splits[i].group1[g] or splits[i].group2[g])
		  in_changed_split[g] = true;
	  }

	  for(int g=0;g<groups.size();g++)
	    interesting[g] = interesting[g] and in_changed_split[g];
	}

	else {
	  bool different = false;
	  for(int g1=0;g1<groups.size();g1++)
	    for(int g2=0;g2<groups.size();g2++)
	      if (all_different(A,c,groups,g1,g2))
		different = true;

	  if (not different)
	    for(int g=0;g<groups.size();g++) 
	      interesting[g] = false;
	}
      }
    
      if (args.count("require-change"))
      {
	if (args.count("split"))
	{
	  vector<bool> in_changed_split(groups.size(),false);
	  for(int i=0;i<splits.size();i++) 
	  {
	    bool some_different = false;
	    for(int g1=0;g1<groups.size();g1++)
	      for(int g2=0;g2<groups.size();g2++)
		if (splits[i].group1[g1] and splits[i].group2[g2]) {
		  if (value[g1] != value[g2])
		    if (not args.count("ignore-rate-change") or 
			(value[g1] != alphabet::gap and value[g2] != alphabet::gap))
		      some_different = true;
		}
	  
	    bool no_same = true;
	    for(int g1=0;g1<groups.size();g1++)
	      for(int g2=0;g2<groups.size();g2++)
		if (splits[i].group1[g1] and splits[i].group2[g2])
		  if (value[g1] == value[g2])
		    no_same = false;

	    // This is Option #1 
	    //  - some conserved differences but no conserved similarities
	    // Also consider Option #2
	    //  - a change in both LETTER and CONSERVATION on at least one of the
	    //    two branches leading from the duplication.
	    if (some_different and no_same) 
	      for(int g=0;g<groups.size();g++)
		if (splits[i].group1[g] or splits[i].group2[g])
		  in_changed_split[g] = true;
	  }

	  for(int g=0;g<groups.size();g++)
	    interesting[g] = interesting[g] and in_changed_split[g];


	}

	else {
	  if (args.count("ignore-rate-change")) 
	  {
	    for(int g=0;g<groups.size();g++) 
	      interesting[g] = interesting[g] and not all_same_or(value,alphabet::gap);
	  }
	  else 
	  {
	    for(int g=0;g<groups.size();g++)
		interesting[g] = interesting[g] and not all_same(value);
	  }
	}
      }
    
      // A group is only interesting if its conserved
      if (args.count("require-conservation"))
	for(int g=0;g<groups.size();g++)
	  interesting[g] = interesting[g] and (value[g] != alphabet::gap);

      // A group is only interesting if it is in one of the splits
      if (args.count("split"))
	for(int g=0;g<groups.size();g++) {
	  bool found = false;
	  for(int i=0;i<splits.size() and not found;i++) 
	    if (splits[i].group1[g] or splits[i].group2[g])
	      found = true;
	  interesting[g] = interesting[g] and found;
	}

      //------------ print 'group conserved at' values ----------//
      cerr<<c+1<<"   ";
      for(int i=0;i<value.size();i++) 
	cerr<<a.lookup(value[i])<<" ";
      cerr<<"     ";

      for(int g=0;g<groups.size();g++) 
	if (interesting[g])
	  cerr<<"1 ";
	else
	  cerr<<"0 ";
      cerr<<endl;

      //------------- print parsimony initial values --------------//
      cerr<<"     ";
      for(int i=0;i<initial_value.size();i++) {
	for(int j=0;j<initial_value[i].size();j++)
	  cerr<<a.lookup(initial_value[i][j]);
	cerr<<" "; 
      }     
      cerr<<"    "<<n_mutations(a,leaf_letters,T,unit_cost_matrix(a))<<endl;
      cerr<<endl;



      //--------------------- Set highlighting ---------------------//
      // Interesting groups -> 1.0
      for(int g=0;g<groups.size();g++)
	if (interesting[g])
	  for(int i=0;i<groups[g].taxa.size();i++)
	    C(c,groups[g].taxa[i]) = 1.0;

      // Set conserved groups -> 0.5
      for(int g=0;g<groups.size();g++)
	if (value[g] != alphabet::gap)
	  for(int i=0;i<groups[g].taxa.size();i++)
	    C(c,groups[g].taxa[i]) = std::max(0.5, C(c,groups[g].taxa[i]));
    }


    cout<<join(sequence_names(A),' ')<<endl;
    for(int i=0;i<C.size1();i++) {
      vector<double> temp;
      for(int j=0;j<C.size2();j++)
	temp.push_back(C(i,j));
      cout<<join(temp,' ')<<endl;
    }
    
  }
  catch (std::exception& e) {
    std::cerr<<"alignment-find-conserved: Error! "<<e.what()<<endl;
    exit(1);
  }

  return 0;
}