void print_newick_tree (struct aln_tree_node* p, struct alignment* aln, FILE *fout) { int j; if (p->links[0]) { fprintf (fout, "("); print_newick_tree (p->links[0], aln, fout); } if (p->num < numseq) { //If you want to print the actual names of the sequences for (j = 0; j < aln->lsn[p->num]; j++) { if (isspace ( (int) aln->sn[p->num][j]) ) { fprintf (fout, "_"); } else { fprintf (fout, "%c", aln->sn[p->num][j]); } } //If you want to print the just the number of the sequence //fprintf(stdout,"%d",p->num); } else { fprintf (fout, ","); } if (p->links[1]) { print_newick_tree (p->links[1], aln, fout); fprintf (fout, ")"); } }
void print_tree (struct aln_tree_node* p, struct alignment* aln, char* outfile) { FILE *fout = NULL; if ( (fout = fopen (outfile, "w") ) == NULL) { fprintf (stderr, "can't open output\n"); exit (0); } //fprintf(stderr,"\n\n%s\n",outfile); /*if(byg_start("xml",outfile) != -1){ fprintf(fout,"<?xml version=\"1.0\" encoding=\"UTF-8\"?> <phyloxml xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:noNamespaceSchemaLocation=\"http://www.phyloxml.org/schema/phyloxml.xsd\"><phylogeny>\n"); print_phyloxml_tree(p,aln,fout); fprintf(fout,"</phylogeny></phyloxml>\n"); }else{*/ print_newick_tree (p, aln, fout); fprintf (fout, ";"); //} fclose (fout); }
void run(std::string tree_filename, std::string fasta_filename, std::string model_name) { Model Mod; // The model Counts data; // the counts Parameters Par; // the parameters std::vector<double> br; // branch lengths double eps = 1e-8; // The threshold for the EM algorithm. Parameters Parsim; // used for simulating data. std::vector<double> brsim; // branch lengths of simulated data. std::vector<std::vector<double> > Cov; // Covariance matrix std::vector<double> variances; // The variances bool simulate; bool nonident; std::string parameters_filename; std::string covariances_filename; // initialize random number generator with time(0). random_initialize(); parameters_filename = strip_extension(fasta_filename) + ".dat"; covariances_filename = strip_extension(fasta_filename) + ".cov"; // Creates the pointers to the model-specific functions. Mod = create_model(model_name); std::cout << "Model: " << Mod.name << std::endl; // Reads the tree. Tree T = read_tree(tree_filename); // Prints the Tree std::cout << "Tree:" << std::endl; print_tree(T); // Check for possible nonidentifiability issues. nonident = nonident_warning(T); // Initialize the parameters for simulation of K81 data for testing Parsim = create_parameters(T); if (fasta_filename == ":test") { // if fasta file is :test generate random data. simulate = true; // Warn std::cout << "WARNING: Using simulated data " << std::endl << std::endl; // Generate random parameters random_parameters_length(T, Mod, Parsim); // Simulate the data data = random_fake_counts(T, 1000, Parsim); // Prints branch-lengths for future check. branch_lengths(Parsim, brsim); std::cout << "Simulated branch lengths:" << std::endl; print_vector(brsim); } else { // otherwise read the data simulate = false; // Read the counts. std::cout << "Reading fasta file:" << std::endl; read_counts(T, data, fasta_filename); add_pseudocounts(0.01, data); std::cout << std::endl; } // Check whether the data and the tree match. if (T.nalpha != data.nalpha || T.nleaves != data.nspecies) { throw std::invalid_argument("The order of the sequences or their number and the phylogenetic tree do not match."); } //Par = create_parameters(T); //print_parameters(Par); //print_vector(Par.r); //clock_t long start_time, end_time; // Runs the EM algorithm. Par is used as initial parameters. // After execution, Par contains the MLE computed by the algorithm. // for local max over multiple iterations Parameters Parmax = Par; Model Modmax = Mod; float likelL = 0.0; float likelMax = -1000000.0; float timerec; float timemax; int outfiles; //whether to save output std::cout << "Starting the EM algorithm: " << std::endl; int s; int S = 0; //count of cases with neg branches int iter; int iterMax; for (int it_runs = 0; it_runs < 10; it_runs++) { Par = create_parameters(T); Mod = create_model(model_name); std::cout << it_runs << ", " ; start_time = clock(); std::tie(likelL, iter) = EMalgorithm(T, Mod, Par, data, eps); end_time = clock(); //print_parameters(Par); // Choses the best permutation. guess_permutation(T, Mod, Par); branch_lengths(Par, br); //print_vector(br); s = find_negative(br); S +=s; timerec = ((float)end_time - start_time) / CLOCKS_PER_SEC; //assign the 1st iter time value, inc ase it's the best if (it_runs == 0){ timemax = timerec; iterMax = iter; } if (likelL > likelMax){ Parmax = Par; Modmax = Mod; timemax = timerec; likelMax = likelL; iterMax = iter; } } // If parameters are not identifiable, the computation of the covariance matrix will // fail as the Fisher info matrix will not be invertible. if (!nonident) { // Compute the covariance matrix using observed Fisher. full_MLE_observed_covariance_matrix(T, Modmax, Parmax, data, Cov); variances.resize(Cov.size()); for(unsigned int i=0; i < Cov.size(); i++) { variances[i] = Cov[i][i]; } // OUTPUT Save the sigmas into a file //save_sigmas_to(covariances_filename, Cov); } std::cout << std::endl; std::cout << "Finished." << std::endl; std::cout << "Likelihood: " << log_likelihood(T, Parmax, data) << std::endl ; std::cout << "Time: " << timemax << std::endl << std::endl; std::cout << "negative branches: " << S << std::endl; std::cout << "Iter: " << iterMax << std::endl; //std::cout << "Branch lengths: " << std::endl; //print_vector(br); outfiles = 0; if (!nonident && outfiles) { std::cout << "Parameter variances: " << std::endl; print_vector(variances); } std::cout << "Newick Tree:" << std::endl; print_newick_tree(T, br); // if is a simulation, print the L2 distance ! if (simulate) { std::cout << "L2 distance: " << parameters_distance(Par, Parsim) << std::endl; std::cout << "KL divergence: " << KL_divergence(T, Par, Parsim) << std::endl; std::cout << std::endl; } // if it is not a simulation, store the parameters in a file ! if (!simulate && outfiles) { std::fstream st; st.precision(15); st.setf(std::ios::fixed,std::ios::floatfield); st.open(parameters_filename.c_str(), std::ios::out); print_parameters(Par, st); } }