void count_pair_distances::operator()(const SequenceTree& T) { if (not initialized) { N = T.n_leaves(); names = T.get_leaf_labels(); m1.resize(N*(N-1)/2); m2.resize(N*(N-1)/2); m1 = 0; m2 = 0; initialized = true; } n_samples++; // Theoretically, we could do this much faster, I think. // vector<vector<int> > leaf_sets = partition_sets(T); int k=0; for(int i=0;i<N;i++) for(int j=0;j<i;j++,k++) { double D = 0; if (RF) D = T.edges_distance(i,j); else D = T.distance(i,j); m1[k] += D; m2[k] += D*D; } }
Partition partition_from_branch(const SequenceTree& T,int b) { dynamic_bitset<> group(T.n_leaves()); const dynamic_bitset<>& with_internal = T.partition(b); for(int i=0;i<group.size();i++) group[i] = with_internal[i]; return Partition(T.get_leaf_labels(), group); }
/// \brief Re-index the leaves of tree \a T so that the labels have the same ordering as in \a names. /// /// \param T The leaf-labelled tree. /// \param names The ordered leaf labels. /// void remap_T_leaf_indices(SequenceTree& T,const vector<string>& names) { assert(names.size() == T.n_leaves()); //----- Remap leaf indices for T onto A's leaf sequence indices -----// try { vector<int> mapping = compute_mapping(T.get_leaf_labels(), names); T.standardize(mapping); } catch(const bad_mapping<string>& b) { bad_mapping<string> b2 = b; b2.clear(); if (b2.from == 0) b2<<"Couldn't find leaf sequence \""<<b2.missing<<"\" in names."; else b2<<"Sequence '"<<b2.missing<<"' not found in the tree."; throw b2; } }
int main(int argc,char* argv[]) { try { cerr.precision(10); cout.precision(10); //---------- Parse command line -------// variables_map args = parse_cmd_line(argc,argv); //---------- Initialize random seed -----------// unsigned long seed = 0; if (args.count("seed")) { seed = args["seed"].as<unsigned long>(); myrand_init(seed); } else seed = myrand_init(); cout<<"random seed = "<<seed<<endl<<endl; alignment A; SequenceTree T; load_A_and_T(args,A,T); cout<<"Using alphabet: "<<A.get_alphabet().name<<endl<<endl; //------------- Show Similarity/Distances between sequences ---------// cout.precision(3); cout<<"conserved = \n"; print_entire(cout,T.get_leaf_labels(),getConserved(A))<<"\n"; Matrix S = getSimilarity(A); cout<<"%similarity = \n"; print_lower(cout,T.get_leaf_labels(),S)<<"\n"; owned_ptr<substitution::MultiModel> smodel_in = get_smodel(args,A); set_parameters(*smodel_in,args); cout<<"Using substitution model: "<<smodel_in->name()<<endl; smodel_in->set_rate(1); show_parameters(cout,*smodel_in); cout<<endl; cout<<"input T = "<<T<<endl; cout<<endl; //----- Prior & Posterior Rate Distributions (rate-bin probabilities) -------- // analyze_rates(A,T,*smodel_in); //------- Estimate branch lengths -------------// owned_ptr<substitution::MultiModel> smodel_est = smodel_in; SequenceTree T2 = T; if (args.count("search")) { vector<int> parameters; if (args["search"].as<string>() == "smodel") for(int i=0;i<smodel_est->n_parameters();i++) if (not smodel_est->is_fixed(i)) parameters.push_back(i); estimate_tree(A,T2,*smodel_est,parameters); cout<<"E T = "<<T2<<endl; show_parameters(cout,*smodel_est); cout<<endl<<endl; analyze_rates(A,T2,*smodel_est); } //------- Set up function to maximize --------// Matrix S1 = getSimilarity(T,*smodel_in); Matrix S2 = getSimilarity(T2,*smodel_est); Matrix D = C(S); cout<<"%difference (actual) = \n"; print_lower(cout,T.get_leaf_labels(),D)<<"\n"; cout<<"%difference (input) = \n"; print_lower(cout,T.get_leaf_labels(),C(S1))<<"\n"; if (args.count("search")){ cout<<"%difference (estimated) = \n"; print_lower(cout,T.get_leaf_labels(),C(S2))<<"\n\n"; } cout<<"tree distances (input) = \n"; print_lower(cout,T.get_leaf_labels(),DistanceMatrix(T))<<"\n"; if (args.count("search")){ cout<<"tree distances (estimated) = \n"; print_lower(cout,T.get_leaf_labels(),DistanceMatrix(T2))<<"\n\n"; } } catch (std::exception& e) { cerr<<"analyze_distances: Error! "<<e.what()<<endl; exit(1); } return 0; }