bool update_lengths(const SequenceTree& Q,const SequenceTree& T,
		    valarray<double>& branch_lengths, 
		    valarray<double>& branch_lengths_squared, 
		    valarray<double>& node_lengths)
{
  // map branches from Q -> T
  vector<int> branches_map = extends_map(T,Q);
  if (not branches_map.size())
    return false;

  // incorporate lengths of branches that map to Q
  for(int b=0;b<Q.n_branches();b++)
  {
    int b2 = branches_map[b];
    double L = T.directed_branch(b2).length();
    branch_lengths[b] += L;
    branch_lengths_squared[b] += L*L;
  }

  // map nodes from T -> Q
  vector<int> nodes_map = get_nodes_map(Q,T,branches_map);

  // incorprate lengths of branches that map to nodes in Q
  for(int i=T.n_leafbranches();i<T.n_branches();i++) 
  {
    const_branchview b = T.branch(i);
    int n1 = nodes_map[b.source()];
    int n2 = nodes_map[b.target()];

    if (n1 == n2)
      node_lengths[n1] += T.branch(i).length();
  }

  return true;
}
Exemple #2
0
void estimate_tree(const alignment& A,
		   SequenceTree& T,
		   substitution::MultiModel& smodel,
		   const vector<int>& parameters)
{
  //------- Estimate branch lengths -------------//
  log_branch_likelihood score2(A,smodel,T,parameters);
  
  // Initialize starting point
  optimize::Vector start(0.1, T.n_branches() + parameters.size());
  for(int b=0;b<T.n_branches();b++)
    start[b] = log(T.branch(b).length());
  for(int i=0;i<parameters.size();i++)
    start[i+T.n_branches()] = smodel.get_parameter_value_as<Double>( parameters[i] );

  //    optimize::Vector end = search_gradient(start,score);
  //    optimize::Vector end = search_basis(start,score);
  optimize::Vector end = search_gradient(start,score2,1e-3);

  for(int b=0;b<T.n_branches();b++)
    T.branch(b).set_length(exp(end[b]));

  for(int i=0;i<parameters.size();i++)
    smodel.set_parameter_value(parameters[i], end[i+T.n_branches()]);

  smodel.set_rate(1);
}
int choose_SPR_target(SequenceTree& T1, int b1_)
{
    const_branchview b1 = T1.directed_branch(b1_);

    //----- Select the branch to move to ------//
    dynamic_bitset<> subtree_nodes = T1.partition(b1.reverse());
    subtree_nodes[b1.target()] = true;

    vector<int> branches;
    vector<double> lengths;

    for(int i=0; i<T1.n_branches(); i++)
    {
        const_branchview bi = T1.branch(i);

        // skip branch if its contained in the subtree
        if (subtree_nodes[bi.target()] and
                subtree_nodes[bi.source()])
            continue;

        double L = 1.0;

        // down-weight branch if it is one of the subtree's 2 neighbors
        if (subtree_nodes[bi.target()] or
                subtree_nodes[bi.source()])
            L = 0.5;

        branches.push_back(i);
        lengths.push_back(L);
    }

    int b2 = branches[ choose(lengths) ];

    return b2;
}
void accum_branch_lengths_ignore_topology::operator()(const SequenceTree& T)
{
  n_samples++;
  for(int b1=0;b1<Q.n_branches();b1++)
  {
    // this is a complete waste of CPU time.
    dynamic_bitset<> bp1 = branch_partition(Q,b1);
    if (not bp1[0]) bp1.flip();

    // search for Q.branch(b1) in tree T
    int b2 = -1;
    for(int i=0;i<T.n_branches();i++) 
    {
      dynamic_bitset<> bp2 = branch_partition(T,i);
      if (not bp2[0]) bp2.flip();

      if (bp1 == bp2) {
	b2 = i;
	break;
      }
    }

    if (b2 != -1) {
      double L = T.branch(b2).length();
      m1[b1] += L;
      m2[b1] += L*L;
      n_matches[b1]++;
    }
  }
}
Exemple #5
0
SequenceTree get_mf_tree(const std::vector<std::string>& names,
			 const std::vector<dynamic_bitset<> >& partitions) 
{
  SequenceTree T = star_tree(names);

  for(int i=0;i<partitions.size();i++)
    T.induce_partition(partitions[i]);

  for(int i=0;i<T.n_branches();i++)
    T.branch(i).set_length(1.0);

  return T;
}
Exemple #6
0
/// Tree prior: topology & branch lengths (exponential)
efloat_t prior_exponential(const SequenceTree& T,double branch_mean) 
{
  efloat_t p = 1;

  // --------- uniform prior on topologies --------//
  if (T.n_leaves()>3)
    p /= num_topologies(T.n_leaves());

  // ---- Exponential prior on branch lengths ---- //
  for(int i=0;i<T.n_branches();i++) 
    p *= exponential_pdf(T.branch(i).length(), branch_mean);

  return p;
}
Exemple #7
0
/// Tree prior: topology & branch lengths (gamma)
efloat_t prior_gamma(const SequenceTree& T,double branch_mean) 
{
  efloat_t p = 1;

  // --------- uniform prior on topologies --------//
  if (T.n_leaves()>3)
    p /= num_topologies(T.n_leaves());

  // ---- Exponential prior on branch lengths ---- //
  double a = 0.5;
  double b = branch_mean*2;

  for(int i=0;i<T.n_branches();i++) 
    p *= gamma_pdf(T.branch(i).length(), a, b);

  return p;
}
Exemple #8
0
SequenceTree get_mf_tree(const std::vector<std::string>& names,
			 const std::vector<Partition>& partitions) 
{
  SequenceTree T = star_tree(names);

  int i=0;
  try {
    for(;i<partitions.size();i++)
      T.induce_partition(partitions[i].group1);
  }
  catch(...) {
    throw myexception()<<"Partition ("<<partitions[i]<<") conflicts with tree "<<T;
  }

  for(int i=0;i<T.n_branches();i++)
    T.branch(i).set_length(1.0);

  return T;
}
// mark nodes in T according to what node of Q they map to
vector<int> get_nodes_map(const SequenceTree& Q,const SequenceTree& T,
			  const vector<int>& branches_map)
{
  assert(branches_map.size() == Q.n_branches() * 2);

  vector<int> nodes_map(T.n_nodes(),-1);

  // map nodes from T -> Q that are in both trees
  for(int b=0;b<Q.n_branches();b++)
  {
    int Q_source = Q.branch(b).source();
    int Q_target = Q.branch(b).target();

    int b2 = branches_map[b];

    int T_source = T.directed_branch(b2).source();
    int T_target = T.directed_branch(b2).target();

    if (nodes_map[T_source] == -1)
      nodes_map[T_source] = Q_source;
    else
      assert(nodes_map[T_source] == Q_source);

    if (nodes_map[T_target] == -1)
      nodes_map[T_target] = Q_target;
    else
      assert(nodes_map[T_target] == Q_target);
  }

  // map the rest of the nodes from T -> Q
  for(int i=Q.n_leaves();i<Q.n_nodes();i++) 
  {
    unsigned D = Q[i].degree();
    if (D <= 3) continue;

    // get a branch of Q pointing into the node
    const_branchview outside = *(Q[i].branches_in());
    // get a branch of T pointing into the node
    outside = T.directed_branch(branches_map[outside.name()]);

    list<const_branchview> branches;
    typedef list<const_branchview>::iterator list_iterator;
    append(outside.branches_after(),branches);
    for(list_iterator b = branches.begin() ; b != branches.end();)
    {
      int node = (*b).target();
      if (nodes_map[node] == -1)
	nodes_map[node] = i;

      if (nodes_map[node] == i) {
	append((*b).branches_after(),branches);
	b++;
      }
      else {
	list_iterator prev = b;
	b++;
	branches.erase(prev);
      }
    }
    assert(branches.size() == D-3);
  }

  for(int i=0;i<nodes_map.size();i++)
    assert(nodes_map[i] != -1);

  return nodes_map;
}
Exemple #10
0
int main(int argc,char* argv[]) 
{ 
  try {
    //----------- Parse command line  ----------//
    variables_map args = parse_cmd_line(argc,argv);

    int skip = args["skip"].as<int>();

    int max = -1;
    if (args.count("max"))
      max = args["max"].as<int>();

    int subsample = args["sub-sample"].as<int>();

    vector<string> prune;
    if (args.count("prune")) {
      string p = args["prune"].as<string>();
      prune = split(p,',');
    }
      

    //----------- Read the topology -----------//
    SequenceTree Q = load_T(args);
    standardize(Q);
    const int B = Q.n_branches();
    const int N = Q.n_nodes();
    vector<double> bf(B);
    for(int b=0;b<bf.size();b++)
      bf[b] = Q.branch(b).length();

    //-------- Read in the tree samples --------//
    if ( args.count("simple") ) {
      accum_branch_lengths_ignore_topology A(Q);
      scan_trees(std::cin,skip,subsample,max,prune,A);
      for(int b=0;b<B;b++)
	Q.branch(b).set_length(A.m1[b]);
      cout<<Q.write_with_bootstrap_fraction(bf,true)<<endl;
      exit(0);
    }

    accum_branch_lengths_same_topology A(Q);

    try {
      scan_trees(std::cin,skip,subsample,max,prune,A);
    }
    catch (std::exception& e) 
    {
      if (args.count("safe"))
	cout<<Q.write(false)<<endl;
      std::cerr<<"tree-mean-lengths: Error! "<<e.what()<<endl;
      exit(0);
    }

    if (log_verbose) std::cerr<<A.n_matches<<" out of "<<A.n_samples<<" trees matched the topology";
    if (log_verbose) std::cerr<<" ("<<double(A.n_matches)/A.n_samples*100<<"%)"<<std::endl;

    //------- Merge lengths and topology -------//
    if (args.count("var")) {
      for(int b=0;b<B;b++)
	Q.branch(b).set_length(A.m2[b]);
      cout<<Q;
      exit(0);
    }
    else {
      for(int b=0;b<B;b++)
	Q.branch(b).set_length(A.m1[b]);

      if (not args.count("no-node-lengths") and 
	  not args.count("show-node-lengths")) {
	for(int n=0;n<N;n++) {
	  int degree = Q[n].neighbors().size();
	  for(out_edges_iterator b = Q[n].branches_out();b;b++)
	    (*b).set_length((*b).length() + A.n1[n]/degree);
	}
      }

      //------- Print Tree and branch lengths -------//
      cout<<Q.write_with_bootstrap_fraction(bf,true)<<endl;

      //------------ Print node lengths -------------//
      if (args.count("show-node-lengths"))
	for(int n=0;n<Q.n_nodes();n++) {
	  if (A.n1[n] > 0) {
	    cout<<"node "<<A.n1[n]<<endl;
	    int b = (*Q[n].branches_in()).name();
	    cout<<partition_from_branch(Q,b)<<endl;
	  }
	}

    }
  }
  catch (std::exception& e) {
    std::cerr<<"tree-mean-lengths: Error! "<<e.what()<<endl;
    exit(1);
  }
  return 0;
}
Exemple #11
0
//FIXME T.seq(i) -> T.leafname(i)
//FIXME T.get_sequences -> T.leafnames()
void delete_node(SequenceTree& T,const std::string& name) 
{
  int index = find_index(T.get_sequences(),name);
  nodeview n = T.prune_subtree(T.branch(index).reverse());
  T.remove_node_from_branch(n);
}
Exemple #12
0
void print_stats(std::ostream& o,std::ostream& trees,
		 const Parameters& P,
		 bool print_alignment) 
{
  efloat_t Pr_prior = P.prior();
  efloat_t Pr_likelihood = P.likelihood();
  efloat_t Pr = Pr_prior * Pr_likelihood;

  o<<"    prior = "<<Pr_prior;
  for(int i=0;i<P.n_data_partitions();i++) 
    o<<"   prior_A"<<i+1<<" = "<<P[i].prior_alignment();

  o<<"    likelihood = "<<Pr_likelihood<<"    logp = "<<Pr
   <<"    beta = " <<P.beta[0]  <<"\n";

  if (print_alignment)
    for(int i=0;i<P.n_data_partitions();i++)
      o<<standardize(*P[i].A, *P.T)<<"\n";
  
  {
    SequenceTree T = *P.T;
    
    valarray<double> weights(P.n_data_partitions());
    for(int i=0;i<weights.size();i++)
      weights[i] = max(sequence_lengths(*P[i].A, P.T->n_leaves()));
    weights /= weights.sum();

    double mu_scale=0;
    for(int i=0;i<P.n_data_partitions();i++)
      mu_scale += P[i].branch_mean()*weights[i];

    for(int b=0;b<T.n_branches();b++)
      T.branch(b).set_length(mu_scale*T.branch(b).length());
    trees<<T<<std::endl;
    trees.flush();
  }
  
  o<<"\n";
  show_parameters(o,P);
  o.flush();

  for(int m=0;m<P.n_smodels();m++) {
    o<<"smodel"<<m+1<<endl;
    for(int i=0;i<P.SModel(m).n_base_models();i++)
      o<<"    rate"<<i<<" = "<<P.SModel(m).base_model(i).rate();
    o<<"\n\n";

    for(int i=0;i<P.SModel(m).n_base_models();i++)
      o<<"    fraction"<<i<<" = "<<P.SModel(m).distribution()[i];
    o<<"\n\n";

    o<<"frequencies = "<<"\n";
    show_frequencies(o,P.SModel(m));
    o<<"\n\n";

    o.flush();
  }

  // The leaf sequences should NOT change during alignment
#ifndef NDEBUG
  for(int i=0;i<P.n_data_partitions();i++)
    check_alignment(*P[i].A, *P.T,"print_stats:end");
#endif
}
Exemple #13
0
bool update_lengths(const MC_tree& Q,const SequenceTree& T,
		    const vector<dynamic_bitset<> >& node_masks,
		    const vector<Partition>& partitions2,
		    valarray<double>& branch_lengths, 
		    valarray<double>& node_lengths)
{
  // check that this tree is consistent with the MC Tree Q
  for(int i=0;i<Q.branch_order.size();i++) 
  {
    int b = Q.branch_order[i];
    if (not implies(T,Q.partitions[b]))
      return false;
  }

  // map branches of the input tree
  for(int b=0;b<T.n_branches();b++)
  {
    Partition P = partition_from_branch(T,b);

    // Find mc tree branches implied by branch b
    vector<int> branches;
    for(int i=0;i<Q.branch_order.size();i++) 
    {
      if (implies(P,partitions2[i]))
	branches.push_back(i);
    }

    // Find out which nodes this branch is inside of 
    vector<int> nodes;
    for(int n=0;n<Q.n_nodes();n++) 
    {
      if (Q.degree(n) == 0) continue;

      Partition P2 = P;
      P2.group1 = P2.group1 & node_masks[n];
      P2.group2 = P2.group2 & node_masks[n];
      if (P2.group1.none() or P2.group2.none())
	continue;

      bool ok = true;
      for(int b=0;b<2*Q.n_branches() and ok;b++)
      {
	if (Q.mapping[b] != n)  continue;
	Partition P3 = Q.partitions[b];
	P3.group1 = P3.group1 & node_masks[n];
	P3.group2 = P3.group2 & node_masks[n];
	if (partition_less_than(P3,P2) or partition_less_than(P3,P2.reverse()))
	  ;
	else
	  ok=false;
      }

      if (ok) {
	nodes.push_back(n);
	assert(Q.degree(n) > 3);
      }
    }

    /*
    cerr<<"Branch: "<<P<<endl;
    cerr<<"  - maps to "<<branches.size()<<" branches."<<endl;
    if (nodes.size()) {
      cerr<<"  - inside node(s):"<<endl;
      cerr<<P<<endl;
      for(int i=0;i<nodes.size();i++)
	cerr<<"    "<<Q.partitions[Q.branch_to_node(nodes[i])]<<endl;
    }
    */

    // This branch should be inside only one node, if any.
    assert(nodes.size() < 2);

    // This branch should not be inside a node, if it implies an mc tree branch.
    if (branches.size()) assert(not nodes.size());

    // But this branch should be inside a node if it doesn't imply a branch.
    assert(branches.size() + nodes.size() > 0);

    const double L = T.branch(b).length();

    // Divide the branch length evenly between the branches it implies.
    for(int i=0;i<branches.size();i++)
      branch_lengths[branches[i]] += L/branches.size();

    // Divide the branch length evenly between the nodes (node?) it implies.
    for(int i=0;i<nodes.size();i++)
      node_lengths[nodes[i]] += L/nodes.size();
  }

  return true;
}
Exemple #14
0
int main(int argc,char* argv[]) 
{ 
  try {

    std::cout.precision(3);
    std::cout.setf(ios::fixed);

    //---------- Parse command line  -------//
    variables_map args = parse_cmd_line(argc,argv);

    //--------------------- Initialize ---------------------//
    if (args.count("seed")) {
      unsigned long seed = args["seed"].as<unsigned long>();
      myrand_init(seed);
    }
    else
      myrand_init();

    int skip = args["skip"].as<int>();

    int subsample=args["sub-sample"].as<int>();

    int max = -1;
    if (args.count("max"))
      max = args["max"].as<int>();

    double min_support = args["min-support"].as<double>();

    // leaf taxa to ignore
    vector<string> ignore;
    if (args.count("ignore") and args["ignore"].as<string>().size() > 0)
      ignore = split(args["ignore"].as<string>(),',');

    // consensus levels 
    string c_levels = args.count("consensus") ? args["consensus"].as<string>() : "";
    vector<double> consensus_levels = get_consensus_levels(c_levels);

    double report_ratio = args["odds-ratio"].as<double>();

    bool show_sub = args.count("sub-partitions");

    //-------------- Read in tree distributions --------------//
    string filename = args["file"].as<string>();
    ifstream file(filename.c_str());
    if (not file)
      throw myexception()<<"Couldn't open file "<<filename;
      
    tree_sample tree_dist(file,skip,subsample,max,ignore);
    const unsigned N = tree_dist.size();

    dynamic_bitset<> ignore_mask = group_from_names(tree_dist.names(),vector<string>());

    //------ Compute Ml partitions or sub-partitions --------//
    vector< pair<Partition,unsigned> > all_partitions;

    if (show_sub)
    {
      int depth = args["depth"].as<int>();

      double min_rooting = args["rooting"].as<double>();

      all_partitions = get_Ml_sub_partitions_and_counts(tree_dist,min_support, ~ignore_mask,min_rooting,depth);
      //      std::cerr<<"n_sub_partitions = "<<all_partitions.size()<<"\n";
    }
    else
      all_partitions = get_Ml_partitions_and_counts(tree_dist,min_support, ~ignore_mask);

    vector<int> which_topology;
    vector<int> topology_counts;
    std::map<tree_record,int> topologies_index;

    for(int i=0;i<tree_dist.size();i++)
    {
      std::map<tree_record,int>::iterator record = topologies_index.find(tree_dist[i]);
      if (record == topologies_index.end())
      {
	which_topology.push_back(i);
	topology_counts.push_back(0);

	topologies_index[tree_dist[i]] = which_topology.size()-1;
	record = topologies_index.find(tree_dist[i]);
      }
      topology_counts[record->second]++;
    }
    vector<int> order = iota<int>(topology_counts.size());

    std::sort(order.begin(),order.end(),sequence_order<int>(topology_counts));
    std::reverse(order.begin(), order.end());

    //------  Topologies to analyze -----//
    vector<string> topologies;

    cout<<"# n_trees = "<<tree_dist.size()<<"   n_topologies = "<<topology_counts.size()<<endl;
    cout<<"\nTopology support: \n\n";
    for(int i=0;i < args["map-trees"].as<int>() ;i++) 
    {
      if (i >= order.size()) continue;

      string t = tree_dist.T(which_topology[order[i]]).write(false);

      unsigned n = topology_counts[order[i]];
      double PP = double(n)/N;
      double o = odds(n,N,1);

      cout<<"MAP-"<<i<<" = "<<t<<endl;
      cout<<"   PP = "<<PP<<"       LOD = "<<log10(o)<<endl;
      cout<<"\n";
    }

    
    for(int i=0,n=0;i<topology_counts.size();i++) 
    {
      n += topology_counts[i];
      double PP = double(n)/N;

      if (PP >= 0.95) {
	cout<<"95% credible set contains "<<i+1<<" topologies."<<endl;
	break;
      }
    }
    cout<<"\n\n";


    //------- Print out support for each partition --------//
    cout<<"Partition support: \n\n";

    vector<pair<Partition,unsigned> > good_partitions = thin(all_partitions, N, report_ratio);

    sort(good_partitions.begin(),good_partitions.end(), count_more());

    for(int i=0;i<good_partitions.size();i++) 
    {
      if (not informative(good_partitions[i].first))
	continue;

      unsigned n = good_partitions[i].second;

      double PP = double(n)/N;
      double o = odds(n,N,1);

      cout<<"   PP = "<<PP<<"       LOD = "<<log10(o);

      if (not good_partitions[i].first.full()) {
	double ratio = odds_ratio(good_partitions,i,N,1);
	cout<<"       ratio = "<<log10(ratio);
      }
      cout<<"       pi = "<<good_partitions[i].first<<endl;

      cout<<endl<<endl;
    }


    //----------- display M[l] consensus levels ----------//
    std::cout.precision(4);
    cout<<"\n\nConsensus levels:\n\n";

    vector<Partition> c50_sub_partitions = get_Ml_partitions(all_partitions, 0.5, N);
    vector<Partition> c50_full_partitions = select(c50_sub_partitions,&Partition::full);
    c50_sub_partitions = get_moveable_tree(c50_sub_partitions);


    vector<unsigned> levels = get_Ml_levels(all_partitions,N,min_support);

    levels.push_back(N+1);

    for(int j=0,k=0;j<levels.size() and k < consensus_levels.size();j++) 
    {
      unsigned clevel = (unsigned)(consensus_levels[k]*N);

      while (k<consensus_levels.size() and clevel < levels[j]) 
      {
	clevel = (unsigned)(consensus_levels[k]*N);

	vector<Partition> all  = get_Ml_partitions(all_partitions,consensus_levels[k],N);
	vector<Partition> sub;
	vector<Partition> full;
	for(int i=0;i<all.size();i++)
	  if (all[i].full())
	    full.push_back(all[i]);
	  else
	    sub.push_back(all[i]);

	SequenceTree consensus = get_mf_tree(tree_dist.names(),full);
	SequenceTree consensus2 = consensus;
      
	double L = consensus_levels[k]*100;
	
	cout.unsetf(ios::fixed | ios::showpoint);
	
	vector<double> bf(consensus.n_branches(),1.0);
	for(int i=0;i<bf.size();i++) 
	{
	  if (consensus.branch(i).is_leaf_branch())
	    bf[i] = -1.0;
	  else {
	    dynamic_bitset<> mask = branch_partition(consensus,i);
	    Partition p(tree_dist.names(),mask);
	    unsigned count = get_partition_count(all_partitions,p);
	    bf[i] = double(count)/N;
	  }
	  consensus2.branch(i).set_length(bf[i]);
	}

	
	cout<<" "<<L<<"-consensus-PP = "<<consensus2.write(true)<<std::endl;
	//cout<<" "<<L<<"-consensus-PP2 = "<<consensus.write_with_bootstrap_fraction(bf,false)<<std::endl;
	cout<<" "<<L<<"-consensus = "<<consensus.write(false)<<std::endl;
	
	if (show_sub) {
	  for(int i=0;i<sub.size();i++)
	    cout<<sub[i]<<endl;
	}
	cout<<endl<<endl;

	k++;
      }
	

      if (levels[j] <=N) {
	show_level(tree_dist,levels[j],c50_sub_partitions,all_partitions,show_sub,false);
	cout<<endl;
      }

    }
  }
  catch (std::exception& e) {
    std::cerr<<"trees-consensus: Error! "<<e.what()<<endl;
    exit(1);
  }
  return 0;
}