예제 #1
0
int main(int argc, char *argv[])
{

  time_t rawtime;
  struct tm * timeinfo;

  time ( &rawtime );
  timeinfo = localtime ( &rawtime );
  //  std::cout << "Start " << asctime (timeinfo);

  /////////////////////////////////////////
  // Setup
  if( argc < 10 || argc > 12){
    std::cout << "Wrong number of input arguments (" << argc << "), should have format:\n";
    std::cout << "\ttree_to_matrix <infile> <tmpfile> <prunedfile> <refalignment> <outfile> <starting_row> <ending_row> <format M=matrix E=esprit> <Do_Pruning 0=no 1=yes 2=only prune> [outfile_freq] [maxdistance(E format only)]\n";
  }
  char* infilename     = argv[1];    // (input)  Tree file with reference sequences
  char* tempfilename   = argv[2];    // (output) Half-pruned file (after pruning, before cleaning up single-child nodes and internal nodes which have become leaves
  char* prunedfilename = argv[3];    // (in/out) Pruned file name, input if not pruning, output if pruning
  char* refalignname   = argv[4];    // (input)  reference fasta file, only uses the sequence identifiers for pruning
  char* outfilename    = argv[5];    // (output) Output distance matrix/list file name
  int startrow    = atoi(argv[6]);   // (input)  First row to print for the distance matrix (0 for all)
  int endrow      = atoi(argv[7]);   // (input)  Last  row to print for the distance matrix (0 for all)
  char format          = argv[8][0]; // (input)  Format of distance, M=matrix, E=ESPRIT list
  int do_pruning  = atoi(argv[9]);   // (input)  0=no 1=yes 2=only prune
  //    M = matrix format, used by mothur
  //    E = ESPRIT list format
  char* frqfilename;
  float maxdist=0.1;
  if( argc == 12 ){
    frqfilename      = argv[10];     // (Optional output) Frequency file name, used when running ESPRIT
    maxdist     = atof(argv[11]);    // (Optional input)  Maximum distance to print in the distance list (ESPRIT format only)
    std::cout << frqfilename << " " << maxdist << std::endl;
  } else {
    if( format == 'E' ){
      std::cerr << "maximum distance required for ESPRIT printout; quitting\n";
      return EXIT_FAILURE;
    }
  }
  int srow = startrow;
  char* inname;
  if( do_pruning>0 ){
    // Read in raw file, then prune it
    inname = infilename;
  } else {
    // Read in pruned file directly
    inname = prunedfilename;
  }
  if( format == 'E' ){
    std::cout << "Printing output in ESPRIT list format\n";
  } else if( format == 'M' ){
    std::cout << "Printing output in Mothur matrix format\n";
  } else {
    std::cerr << "Unknown format " << format << ". Quitting\n";
    return EXIT_FAILURE;
  }
  std::list<TreeNode>::iterator startit;
  std::list<TreeNode>::iterator endit;

  /////////////////////////////////////////
  // READ IN TREE FROM FILE
  std::cout << "Reading in " << inname << std::endl;
  PhyloTree<TreeNode>* tr = new PhyloTree<TreeNode>();
  std::ifstream infile;
  infile.open(inname);
  if( !infile.is_open() ){ std::cout << "Unable to open file " << inname << std::endl; }
  tr->readTree(infile);
  std::cout << "LEAVES: " << tr->getNleaves() << std::endl;
  tr->check_root();
  /////////////////////////////////////////
  // Prune tree (if necessary)
  if( do_pruning>0 ){
    std::cout << "Pruning tree\n";
    // Read in reference alignment file and grab reference file names
    std::ifstream reffile;
    reffile.open(refalignname);
    if( !reffile.is_open() ){ std::cout << "Unable to open file " << refalignname << std::endl; }
    char line[100];
    reffile >> line;
    while( !reffile.eof() ){
      if( line[0] == '>' ){
	// Clean-up the file name
	std::string name(line);
	int slash = (int)name.find("/");
	name = name.substr(1, slash-1);
	int bar = (int)name.find("|");
	if ( bar != name.npos ){
	  name = name.replace(bar, 1, "_");
	}
	// Remove this leaf from the tree
	tr->deleteLeaf(name.c_str());
      }
      reffile >> line;
    }
    reffile.close();

    // Print to tmp file, just in case 
    std::ofstream treeout;
    treeout.open( tempfilename );
    if( !treeout.is_open() ){ std::cout << "Unable to open file " << tempfilename << std::endl; }
    treeout.precision(5);
    treeout.setf(std::ios::fixed,std::ios::floatfield);
    tr->writeTree( treeout );
    treeout.close();
    std::cout << "Printed to file " << tempfilename << std::endl;
    
    // Remove internal nodes that are now leaves
    while( tr->deleteLeaf("") > 0 );

    // Smooth to remove single child nodes
    while( tr->smooth() > 0 );

    // Check that the root doesn't have only one node
    tr->check_root();

    // Print pruned file, for use by parallel jobs
    treeout.open( prunedfilename );
    if( !treeout.is_open() ){ std::cout << "Unable to open file " << prunedfilename << std::endl; }
    treeout.precision(6);
    treeout.setf(std::ios::fixed,std::ios::floatfield);
    tr->writeTree( treeout );
    treeout.close();
    std::cout << "Printed to file " << prunedfilename << std::endl;

    // If I only needed to prune then I'm done
    if( do_pruning>1 ){
      std::cout << "Done pruning tips, ready to launch parallel tree_to_matrix jobs\n";
      return EXIT_SUCCESS;
    }
  }