int main(int argc, char *argv[]) { time_t rawtime; struct tm * timeinfo; time ( &rawtime ); timeinfo = localtime ( &rawtime ); // std::cout << "Start " << asctime (timeinfo); ///////////////////////////////////////// // Setup if( argc < 10 || argc > 12){ std::cout << "Wrong number of input arguments (" << argc << "), should have format:\n"; std::cout << "\ttree_to_matrix <infile> <tmpfile> <prunedfile> <refalignment> <outfile> <starting_row> <ending_row> <format M=matrix E=esprit> <Do_Pruning 0=no 1=yes 2=only prune> [outfile_freq] [maxdistance(E format only)]\n"; } char* infilename = argv[1]; // (input) Tree file with reference sequences char* tempfilename = argv[2]; // (output) Half-pruned file (after pruning, before cleaning up single-child nodes and internal nodes which have become leaves char* prunedfilename = argv[3]; // (in/out) Pruned file name, input if not pruning, output if pruning char* refalignname = argv[4]; // (input) reference fasta file, only uses the sequence identifiers for pruning char* outfilename = argv[5]; // (output) Output distance matrix/list file name int startrow = atoi(argv[6]); // (input) First row to print for the distance matrix (0 for all) int endrow = atoi(argv[7]); // (input) Last row to print for the distance matrix (0 for all) char format = argv[8][0]; // (input) Format of distance, M=matrix, E=ESPRIT list int do_pruning = atoi(argv[9]); // (input) 0=no 1=yes 2=only prune // M = matrix format, used by mothur // E = ESPRIT list format char* frqfilename; float maxdist=0.1; if( argc == 12 ){ frqfilename = argv[10]; // (Optional output) Frequency file name, used when running ESPRIT maxdist = atof(argv[11]); // (Optional input) Maximum distance to print in the distance list (ESPRIT format only) std::cout << frqfilename << " " << maxdist << std::endl; } else { if( format == 'E' ){ std::cerr << "maximum distance required for ESPRIT printout; quitting\n"; return EXIT_FAILURE; } } int srow = startrow; char* inname; if( do_pruning>0 ){ // Read in raw file, then prune it inname = infilename; } else { // Read in pruned file directly inname = prunedfilename; } if( format == 'E' ){ std::cout << "Printing output in ESPRIT list format\n"; } else if( format == 'M' ){ std::cout << "Printing output in Mothur matrix format\n"; } else { std::cerr << "Unknown format " << format << ". Quitting\n"; return EXIT_FAILURE; } std::list<TreeNode>::iterator startit; std::list<TreeNode>::iterator endit; ///////////////////////////////////////// // READ IN TREE FROM FILE std::cout << "Reading in " << inname << std::endl; PhyloTree<TreeNode>* tr = new PhyloTree<TreeNode>(); std::ifstream infile; infile.open(inname); if( !infile.is_open() ){ std::cout << "Unable to open file " << inname << std::endl; } tr->readTree(infile); std::cout << "LEAVES: " << tr->getNleaves() << std::endl; tr->check_root(); ///////////////////////////////////////// // Prune tree (if necessary) if( do_pruning>0 ){ std::cout << "Pruning tree\n"; // Read in reference alignment file and grab reference file names std::ifstream reffile; reffile.open(refalignname); if( !reffile.is_open() ){ std::cout << "Unable to open file " << refalignname << std::endl; } char line[100]; reffile >> line; while( !reffile.eof() ){ if( line[0] == '>' ){ // Clean-up the file name std::string name(line); int slash = (int)name.find("/"); name = name.substr(1, slash-1); int bar = (int)name.find("|"); if ( bar != name.npos ){ name = name.replace(bar, 1, "_"); } // Remove this leaf from the tree tr->deleteLeaf(name.c_str()); } reffile >> line; } reffile.close(); // Print to tmp file, just in case std::ofstream treeout; treeout.open( tempfilename ); if( !treeout.is_open() ){ std::cout << "Unable to open file " << tempfilename << std::endl; } treeout.precision(5); treeout.setf(std::ios::fixed,std::ios::floatfield); tr->writeTree( treeout ); treeout.close(); std::cout << "Printed to file " << tempfilename << std::endl; // Remove internal nodes that are now leaves while( tr->deleteLeaf("") > 0 ); // Smooth to remove single child nodes while( tr->smooth() > 0 ); // Check that the root doesn't have only one node tr->check_root(); // Print pruned file, for use by parallel jobs treeout.open( prunedfilename ); if( !treeout.is_open() ){ std::cout << "Unable to open file " << prunedfilename << std::endl; } treeout.precision(6); treeout.setf(std::ios::fixed,std::ios::floatfield); tr->writeTree( treeout ); treeout.close(); std::cout << "Printed to file " << prunedfilename << std::endl; // If I only needed to prune then I'm done if( do_pruning>1 ){ std::cout << "Done pruning tips, ready to launch parallel tree_to_matrix jobs\n"; return EXIT_SUCCESS; } }