void show_level(const tree_sample& tree_dist, unsigned level, const vector<Partition>& skeleton, const vector<pair<Partition,unsigned> >& all_partitions, bool show_sub, bool show_PP) { vector<Partition> full_skeleton = select(skeleton,&Partition::full); const unsigned N = tree_dist.size(); cout.unsetf(ios::fixed | ios::showpoint); const vector<Partition> sub = get_Ml_partitions(all_partitions,level); const vector<Partition> full = select(sub,&Partition::full); // const vector<Partition> moveable = get_moveable_tree(sub); // vector<Partition> full_hull = Ml_min_Hull(full_skeleton,sub); // vector<Partition> sub_hull = Ml_min_Hull(skeleton,sub); double fraction = double(level)/N; double LOD = log10(odds(level,N,1)); cout<<" level = "<<fraction*100 <<" LOD = "<<LOD <<" full = "<<count(full, informative); if (show_sub) { cout<<" sub = " <<count(sub,informative); // cout<<" consistent = "<<count(moveable,informative); // cout<<" sub(50) = "<<count(moveable,informative); // cout<<" sub#1 = " <<count(full_hull,informative); // cout<<" sub#2 = " <<count(sub_hull,informative); } if (show_PP) cout<<" PP = "<<100*tree_dist.PP(full); cout<<endl; }
int main(int argc,char* argv[]) { try { std::cout.precision(3); std::cout.setf(ios::fixed); //---------- Parse command line -------// variables_map args = parse_cmd_line(argc,argv); //--------------------- Initialize ---------------------// if (args.count("seed")) { unsigned long seed = args["seed"].as<unsigned long>(); myrand_init(seed); } else myrand_init(); int skip = args["skip"].as<int>(); int subsample=args["sub-sample"].as<int>(); int max = -1; if (args.count("max")) max = args["max"].as<int>(); double min_support = args["min-support"].as<double>(); // leaf taxa to ignore vector<string> ignore; if (args.count("ignore") and args["ignore"].as<string>().size() > 0) ignore = split(args["ignore"].as<string>(),','); // consensus levels string c_levels = args.count("consensus") ? args["consensus"].as<string>() : ""; vector<double> consensus_levels = get_consensus_levels(c_levels); double report_ratio = args["odds-ratio"].as<double>(); bool show_sub = args.count("sub-partitions"); //-------------- Read in tree distributions --------------// string filename = args["file"].as<string>(); ifstream file(filename.c_str()); if (not file) throw myexception()<<"Couldn't open file "<<filename; tree_sample tree_dist(file,skip,subsample,max,ignore); const unsigned N = tree_dist.size(); dynamic_bitset<> ignore_mask = group_from_names(tree_dist.names(),vector<string>()); //------ Compute Ml partitions or sub-partitions --------// vector< pair<Partition,unsigned> > all_partitions; if (show_sub) { int depth = args["depth"].as<int>(); double min_rooting = args["rooting"].as<double>(); all_partitions = get_Ml_sub_partitions_and_counts(tree_dist,min_support, ~ignore_mask,min_rooting,depth); // std::cerr<<"n_sub_partitions = "<<all_partitions.size()<<"\n"; } else all_partitions = get_Ml_partitions_and_counts(tree_dist,min_support, ~ignore_mask); vector<int> which_topology; vector<int> topology_counts; std::map<tree_record,int> topologies_index; for(int i=0;i<tree_dist.size();i++) { std::map<tree_record,int>::iterator record = topologies_index.find(tree_dist[i]); if (record == topologies_index.end()) { which_topology.push_back(i); topology_counts.push_back(0); topologies_index[tree_dist[i]] = which_topology.size()-1; record = topologies_index.find(tree_dist[i]); } topology_counts[record->second]++; } vector<int> order = iota<int>(topology_counts.size()); std::sort(order.begin(),order.end(),sequence_order<int>(topology_counts)); std::reverse(order.begin(), order.end()); //------ Topologies to analyze -----// vector<string> topologies; cout<<"# n_trees = "<<tree_dist.size()<<" n_topologies = "<<topology_counts.size()<<endl; cout<<"\nTopology support: \n\n"; for(int i=0;i < args["map-trees"].as<int>() ;i++) { if (i >= order.size()) continue; string t = tree_dist.T(which_topology[order[i]]).write(false); unsigned n = topology_counts[order[i]]; double PP = double(n)/N; double o = odds(n,N,1); cout<<"MAP-"<<i<<" = "<<t<<endl; cout<<" PP = "<<PP<<" LOD = "<<log10(o)<<endl; cout<<"\n"; } for(int i=0,n=0;i<topology_counts.size();i++) { n += topology_counts[i]; double PP = double(n)/N; if (PP >= 0.95) { cout<<"95% credible set contains "<<i+1<<" topologies."<<endl; break; } } cout<<"\n\n"; //------- Print out support for each partition --------// cout<<"Partition support: \n\n"; vector<pair<Partition,unsigned> > good_partitions = thin(all_partitions, N, report_ratio); sort(good_partitions.begin(),good_partitions.end(), count_more()); for(int i=0;i<good_partitions.size();i++) { if (not informative(good_partitions[i].first)) continue; unsigned n = good_partitions[i].second; double PP = double(n)/N; double o = odds(n,N,1); cout<<" PP = "<<PP<<" LOD = "<<log10(o); if (not good_partitions[i].first.full()) { double ratio = odds_ratio(good_partitions,i,N,1); cout<<" ratio = "<<log10(ratio); } cout<<" pi = "<<good_partitions[i].first<<endl; cout<<endl<<endl; } //----------- display M[l] consensus levels ----------// std::cout.precision(4); cout<<"\n\nConsensus levels:\n\n"; vector<Partition> c50_sub_partitions = get_Ml_partitions(all_partitions, 0.5, N); vector<Partition> c50_full_partitions = select(c50_sub_partitions,&Partition::full); c50_sub_partitions = get_moveable_tree(c50_sub_partitions); vector<unsigned> levels = get_Ml_levels(all_partitions,N,min_support); levels.push_back(N+1); for(int j=0,k=0;j<levels.size() and k < consensus_levels.size();j++) { unsigned clevel = (unsigned)(consensus_levels[k]*N); while (k<consensus_levels.size() and clevel < levels[j]) { clevel = (unsigned)(consensus_levels[k]*N); vector<Partition> all = get_Ml_partitions(all_partitions,consensus_levels[k],N); vector<Partition> sub; vector<Partition> full; for(int i=0;i<all.size();i++) if (all[i].full()) full.push_back(all[i]); else sub.push_back(all[i]); SequenceTree consensus = get_mf_tree(tree_dist.names(),full); SequenceTree consensus2 = consensus; double L = consensus_levels[k]*100; cout.unsetf(ios::fixed | ios::showpoint); vector<double> bf(consensus.n_branches(),1.0); for(int i=0;i<bf.size();i++) { if (consensus.branch(i).is_leaf_branch()) bf[i] = -1.0; else { dynamic_bitset<> mask = branch_partition(consensus,i); Partition p(tree_dist.names(),mask); unsigned count = get_partition_count(all_partitions,p); bf[i] = double(count)/N; } consensus2.branch(i).set_length(bf[i]); } cout<<" "<<L<<"-consensus-PP = "<<consensus2.write(true)<<std::endl; //cout<<" "<<L<<"-consensus-PP2 = "<<consensus.write_with_bootstrap_fraction(bf,false)<<std::endl; cout<<" "<<L<<"-consensus = "<<consensus.write(false)<<std::endl; if (show_sub) { for(int i=0;i<sub.size();i++) cout<<sub[i]<<endl; } cout<<endl<<endl; k++; } if (levels[j] <=N) { show_level(tree_dist,levels[j],c50_sub_partitions,all_partitions,show_sub,false); cout<<endl; } } } catch (std::exception& e) { std::cerr<<"trees-consensus: Error! "<<e.what()<<endl; exit(1); } return 0; }