Esempio n. 1
0
void do_templates ()
{
	int i;

	GLOBAL = new_stream ();
	tpls = (Template**) alloca (c_nsym * sizeof (Template*));

	for (i = 0; i < c_nsym; i++)
		tpls [i] = 0;

	pass ();

	for (i = 0; i < c_nsym; i++)
		if (tpls [i]) {
			free (tpls [i]->BODY);
			free (tpls [i]);
		}
	free (CODE);
	CODE = combine_output (GLOBAL);
}
////////////////////////////////////////////////////////////
// main
////////////////////////////////////////////////////////////
int main(int argc, char **argv) {
  parse_command_line(argc, argv);

  // prepare AT and GC counts
  unsigned long long atgc[2] = {0};

  // make trusted kmer data structure
  bithash *trusted = new bithash(k);

  // get good kmers from Hammer
  if (hammerf != NULL) {
    string hammerf_str(hammerf);
    if (hammerf_str.substr(hammerf_str.size()-3) == ".gz") {
	igzstream hammerf_in(hammerf);
	trusted->hammer_file_load(hammerf_in, atgc);
    } else {
	ifstream hammerf_in(hammerf);
	trusted->hammer_file_load(hammerf_in, atgc);
    }   
  }
  
  // get kmer counts
  if(merf != NULL) {
    string merf_str(merf);
    if(ATcutf != NULL) {
      if(merf_str.substr(merf_str.size()-3) == ".gz") {
	igzstream mer_in(merf);
	trusted->tab_file_load(mer_in, load_AT_cutoffs(), atgc);
      } else {
	ifstream mer_in(merf);
	trusted->tab_file_load(mer_in, load_AT_cutoffs(), atgc);
      }
    } else {
      if(merf_str.substr(merf_str.size()-3) == ".gz") {
	igzstream mer_in(merf);
	trusted->tab_file_load(mer_in, cutoff, atgc);
      } else {
	ifstream mer_in(merf);
	trusted->tab_file_load(mer_in, cutoff, atgc);
      }
    }

  // saved bithash
  } else if(bithashf != NULL) {
    if(strcmp(bithashf,"-") == 0) {
      cerr << "Saved bithash cannot be piped in.  Please specify file." << endl;
      exit(EXIT_FAILURE);
    } else
      trusted->binary_file_input(bithashf, atgc);
  }  
  cout << trusted->num_kmers() << " trusted kmers" << endl;

  double prior_prob[4];
  prior_prob[0] = (double)atgc[0] / (double)(atgc[0]+atgc[1]) / 2.0;
  prior_prob[1] = .5 - prior_prob[0];
  prior_prob[2] = prior_prob[1];
  prior_prob[3] = prior_prob[0];
  
  //cout << "AT: " << atgc[0] << " GC: " << atgc[1] << endl;
  cout << "AT% = " << (2*prior_prob[0]) << endl;

  // make list of files
  vector<string> fastqfs;
  vector<int> pairedend_codes;
  parse_fastq(fastqfs, pairedend_codes);

  // process each file
  string fqf;
  bool zip;
  for(int f = 0; f < fastqfs.size(); f++) {
    fqf = fastqfs[f];
    cout << fqf << endl;

    // unzip
    if(fqf.substr(fqf.size()-3) == ".gz") {
      zip = true;
      unzip_fastq(fqf);
    } else
      zip = false;

    // determine quality value scale
    if(Read::quality_scale == -1)
     guess_quality_scale(fqf);

    // split file
    vector<streampos> starts;
    vector<unsigned long long> counts;
    chunkify_fastq(fqf, starts, counts);

    // learn nt->nt transitions
    double ntnt_prob[Read::max_qual][4][4] = {0};
    for(int q = 0; q < Read::max_qual; q++)
      for(int i = 0; i < 4; i++)
	for(int j = 0; j < 4; j++)
	  if(i != j)
	    ntnt_prob[q][i][j] = 1.0/3.0;

    if(!TESTING)
      learn_errors(fqf, trusted, starts, counts, ntnt_prob, prior_prob);

    // correct
    correct_reads(fqf, pairedend_codes[f], trusted, starts, counts, ntnt_prob, prior_prob);
    
    // combine
    if(pairedend_codes[f] == 0) {
      combine_output(fqf, string("cor"), uncorrected_out);
    }

    // combine paired end
    if(pairedend_codes[f] == 2) {
      if(!zip) {
	combine_output_paired(fastqfs[f-1], fqf, string("cor"), uncorrected_out);
      } else {
	combine_output_paired(fastqfs[f-1].substr(0,fastqfs[f-1].size()-3), fqf, string("cor"), uncorrected_out);
      }
    }

    if(zip)
      zip_fastq(fqf);
  }

  return 0;
}