Пример #1
0
int main(int argc, const char *argv[]) {
    OptArgs opts;
    opts.ParseCmdLine(argc, argv);
    bool help;
    string topFile, bottomFile, outFile;
    opts.GetOption(topFile, "", '-', "top");
    opts.GetOption(bottomFile, "", '-', "bottom");
    opts.GetOption(outFile, "", '-', "merged");
    opts.GetOption(help, "false", 'h', "help");
    if (help || argc == 1) {
        usage();
    }
    ION_ASSERT(!topFile.empty() && !bottomFile.empty() && !outFile.empty(),
               "Need top, bottom and merged files. use --help for details.");
    MergeAcq merger;
    Image top;
    Image bottom;
    Image combo;
    cout << "Loading images." << endl;
    ION_ASSERT(top.LoadRaw(topFile.c_str()), "Couldn't load file.");
    ION_ASSERT(bottom.LoadRaw(bottomFile.c_str()), "Couldn't load file.");
    merger.SetFirstImage(&bottom);
    merger.SetSecondImage(&top, bottom.GetRows(), 0); // starting vertically raised but columns the same.
    cout << "Merging." << endl;
    merger.Merge(combo);
    Acq acq;
    cout << "Saving. " << endl;
    acq.SetData(&combo);
    acq.WriteVFC(outFile.c_str(), 0, 0, combo.GetCols(), combo.GetRows());
    cout << "Done." << endl;
    return 0;
}
Пример #2
0
int main(int argc, const char *argv[]) {

  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  string queryFile, goldFile;
  double epsilon;
  bool help = false;
  bool version = false;
  int allowedWrong = 0;
  double maxAbsVal = 0;
  double minCorrelation = 1;
  opts.GetOption(queryFile, "", 'q', "query-wells");
  opts.GetOption(goldFile, "", 'g', "gold-wells");
  opts.GetOption(epsilon, "0.0", 'e', "epsilon");
  opts.GetOption(allowedWrong, "0", 'm', "max-mismatch");
  opts.GetOption(minCorrelation, "1", 'c', "min-cor");
  opts.GetOption(maxAbsVal, "1e3", '-', "max-val");
  opts.GetOption(help, "false", 'h', "help");
  opts.GetOption(version, "false", 'v', "version");
  opts.CheckNoLeftovers();
  
  if (version) {
  	fprintf (stdout, "%s", IonVersion::GetFullVersion("RawWellsEquivalent").c_str());
  	exit(0);
  }
  
  if (queryFile.empty() || goldFile.empty() || help) {
    cout << "RawWellsEquivalent - Check to see how similar two wells files are to each other" << endl 
	 << "options: " << endl
	 << "   -g,--gold-wells    trusted wells to compare against." << endl
	 << "   -q,--query-wells   new wells to check." << endl
	 << "   -e,--epsilon       maximum allowed difference to be considered equivalent." << endl 
	 << "   -m,--max-mixmatch  maximum number of non-equivalent entries to allow." << endl
	 << "   -c,--min-cor       minimum correlation allowed to be considered equivalent." << endl
	 << "      --max-val       maximum absolute value considered (avoid extreme values)." << endl
	 << "   -h,--help          this message." << endl
	 << "" << endl 
         << "usage: " << endl
	 << "   RawWellsEquivalent -e 10 --query-wells query.wells --gold-wells gold.wells " << endl;
    exit(1);
  }

  NumericalComparison<double> compare = CompareWells(queryFile, goldFile, epsilon, maxAbsVal);
  cout << compare.GetCount() << " total values. " << endl
       << compare.GetNumSame() << " (" << (100.0 * compare.GetNumSame())/compare.GetCount() <<  "%) are equivalent. " << endl
       << compare.GetNumDiff() << " (" << (100.0 * compare.GetNumDiff())/compare.GetCount() <<  "%) are not equivalent. " << endl 

       << "Correlation of: " << compare.GetCorrelation() << endl;

  if((compare.GetCount() - allowedWrong) >= compare.GetNumSame() || 
     compare.GetCorrelation() < minCorrelation) {
    cout << "Wells files not equivalent for allowed mismatch: " << allowedWrong 
	 << " minimum correlation: " << minCorrelation << endl;
    return 1;
  }
  cout << "Wells files equivalent for allowed mismatch: " << allowedWrong 
       << " minimum correlation: " << minCorrelation << endl;
  return 0;
}
Пример #3
0
int main(int argc, const char *argv[]) {
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  string queryFile, goldFile;
  double epsilon;
  bool help = false;
  bool version = false;
  int allowedWrong = 0;
  double maxAbsVal = 0;
  double minCorrelation = 1;
  bool dumpMisMatch = false;
  opts.GetOption(queryFile, "", 'q', "query-wells");
  opts.GetOption(goldFile, "", 'g', "gold-wells");
  opts.GetOption(epsilon, "0.0", 'e', "epsilon");
  opts.GetOption(allowedWrong, "0", 'm', "max-mismatch");
  opts.GetOption(minCorrelation, "1", 'c', "min-cor");
  opts.GetOption(maxAbsVal, "1e3", '-', "max-val");
  opts.GetOption(help, "false", 'h', "help");
  opts.GetOption(version, "false", 'v', "version");
  opts.GetOption(dumpMisMatch, "false", 'o', "dump-mismatch");
  opts.CheckNoLeftovers();
  
  if (version) {
  	fprintf (stdout, "%s", IonVersion::GetFullVersion("RawWellsEquivalent").c_str());
  	exit(0);
  }
  
  if (queryFile.empty() || goldFile.empty() || help) {
    printUsage();
    exit(1);
  }

  DumpMismatches dump(dumpMisMatch);
  NumericalComparison<double> compare = CompareWells(queryFile, goldFile, epsilon, maxAbsVal, dump);
  cout << compare.GetCount() << " total values. " << endl
       << compare.GetNumSame() << " (" << (100.0 * compare.GetNumSame())/compare.GetCount() <<  "%) are equivalent. " << endl
       << compare.GetNumDiff() << " (" << (100.0 * compare.GetNumDiff())/compare.GetCount() <<  "%) are not equivalent. " << endl 
       << "Correlation of: " << compare.GetCorrelation() << endl;

  if((compare.GetCount() - allowedWrong) > compare.GetNumSame() || 
     (compare.GetCorrelation() < minCorrelation && compare.GetCount() != compare.GetNumSame())) {
     cout << "Wells files not equivalent for allowed mismatch: " << allowedWrong 
     << " minimum correlation: " << minCorrelation << endl;
     return 1;
  }
  cout << "Wells files equivalent for allowed mismatch: " << allowedWrong 
       << " minimum correlation: " << minCorrelation << endl;
  return 0;
}
Пример #4
0
void ExtendParameters::SetupFileIO(OptArgs &opts) {
  // freeBayes slot
  fasta                                 = opts.GetFirstString('r', "reference", "");
  if (fasta.empty()) {
    cerr << "Fatal ERROR: Reference file not specified via -r" << endl;
    exit(1);
  }
  ValidateAndCanonicalizePath(fasta);

  // freeBayes slot
  variantPriorsFile                     = opts.GetFirstString('c', "input-vcf", "");
  if (variantPriorsFile.empty()) {
    cerr << "INFO: No input VCF (Hotspot) file specified via -c,--input-vcf" << endl;
  }
  else
	ValidateAndCanonicalizePath(variantPriorsFile);

  sseMotifsFileName                     = opts.GetFirstString('e', "error-motifs", "");
  sseMotifsProvided = true;
  if (sseMotifsFileName.empty()) {
    sseMotifsProvided = false;
    cerr << "INFO: Systematic error motif file not specified via -e" << endl;
  }
  else
	ValidateAndCanonicalizePath(sseMotifsFileName);

  opts.GetOption(bams, "", 'b', "input-bam");
  if (bams.empty()) {
    cerr << "FATAL ERROR: BAM file not specified via -b" << endl;
    exit(-1);
  }
  for (unsigned int i_bam = 0; i_bam < bams.size(); ++i_bam)
    ValidateAndCanonicalizePath(bams[i_bam]);

  outputDir                             = opts.GetFirstString('O', "output-dir", ".");
  ValidateAndCanonicalizePath(outputDir);

  outputFile                            = opts.GetFirstString('o', "output-vcf", "");
  if (outputFile.empty()) {
    cerr << "Fatal ERROR: Output VCF filename not specified via -o" << endl;
    exit(1);
  }

  // Are those file names?
  postprocessed_bam                     = opts.GetFirstString('-', "postprocessed-bam", "");
  sampleName                            = opts.GetFirstString('g', "sample-name", "");
  force_sample_name                     = opts.GetFirstString('-', "force-sample-name", "");

}
Пример #5
0
int main (int argc, const char *argv[])
{
  printf ("------------- bamrealignment --------------\n");

  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  vector<int> score_vals(4);

  string input_bam  = opts.GetFirstString  ('i', "input", "");
  string output_bam = opts.GetFirstString  ('o', "output", "");
  opts.GetOption(score_vals, "4,-6,-5,-2", 's', "scores");
  int    clipping   = opts.GetFirstInt     ('c', "clipping", 2);
  bool   anchors    = opts.GetFirstBoolean ('a', "anchors", true);
  int    bandwidth  = opts.GetFirstInt     ('b', "bandwidth", 10);
  bool   verbose    = opts.GetFirstBoolean ('v', "verbose", false);
  bool   debug      = opts.GetFirstBoolean ('d', "debug", false);
  int    format     = opts.GetFirstInt     ('f', "format", 1);
  int  num_threads  = opts.GetFirstInt     ('t', "threads", 8);
  string log_fname  = opts.GetFirstString  ('l', "log", "");
  

  if (input_bam.empty() or output_bam.empty())
    return PrintHelp();

  opts.CheckNoLeftovers();

  std::ofstream logf;
  if (log_fname.size ())
  {
    logf.open (log_fname.c_str ());
    if (!logf.is_open ())
    {
      fprintf (stderr, "bamrealignment: Failed to open log file %s\n", log_fname.c_str());
      return 1;
    }
  }

  BamReader reader;
  if (!reader.Open(input_bam)) {
    fprintf(stderr, "bamrealignment: Failed to open input file %s\n", input_bam.c_str());
    return 1;
  }

  SamHeader header = reader.GetHeader();
  RefVector refs   = reader.GetReferenceData();

  BamWriter writer;
  writer.SetNumThreads(num_threads);
  if (format == 1)
    writer.SetCompressionMode(BamWriter::Uncompressed);
  else
    writer.SetCompressionMode(BamWriter::Compressed);

  if (!writer.Open(output_bam, header, refs)) {
    fprintf(stderr, "bamrealignment: Failed to open output file %s\n", output_bam.c_str());
    return 1;
  }


  // The meat starts here ------------------------------------

  if (verbose)
    cout << "Verbose option is activated, each alignment will print to screen." << endl
         << "  After a read hit RETURN to continue to the next one," << endl
         << "  or press q RETURN to quit the program," << endl
         << "  or press s Return to silence verbose," << endl
         << "  or press c RETURN to continue printing without further prompt." << endl << endl;

  unsigned int readcounter = 0;
  unsigned int mapped_readcounter = 0;
  unsigned int realigned_readcounter = 0;
  unsigned int modified_alignment_readcounter = 0;
  unsigned int pos_update_readcounter = 0;
  unsigned int failed_clip_realigned_readcount = 0;
  
  unsigned int already_perfect_readcount = 0;
  
  unsigned int bad_md_tag_readcount = 0;
  unsigned int error_recreate_ref_readcount = 0;
  unsigned int error_clip_anchor_readcount = 0;
  unsigned int error_sw_readcount = 0;
  unsigned int error_unclip_readcount = 0;
  
  unsigned int start_position_shift;
  int orig_position;
  int new_position;

  string  md_tag, new_md_tag, input = "x";
  vector<CigarOp>    new_cigar_data;
  vector<MDelement>  new_md_data;
  bool position_shift = false;
  time_t start_time = time(NULL);

  Realigner aligner;
  aligner.verbose_ = verbose;
  aligner.debug_   = debug;
  if (!aligner.SetScores(score_vals))
    cout << "bamrealignment: Four scores need to be provided: match, mismatch, gap open, gap extend score!" << endl;

  aligner.SetAlignmentBandwidth(bandwidth);

  BamAlignment alignment;
  while(reader.GetNextAlignment(alignment)){
    readcounter ++;
    position_shift = false;
    
    if ( (readcounter % 100000) == 0 )
       cout << "Processed " << readcounter << " reads. Elapsed time: " << (time(NULL) - start_time) << endl;

    if (alignment.IsMapped()) {
      
      
      
      orig_position = alignment.Position;
      mapped_readcounter++;
      aligner.SetClipping(clipping, !alignment.IsReverseStrand());
      if (aligner.verbose_) {
    	cout << endl;
        if (alignment.IsReverseStrand())
          cout << "The read is from the reverse strand." << endl;
        else
          cout << "The read is from the forward strand." << endl;
      }

      if (!alignment.GetTag("MD", md_tag)) {
    	if (aligner.verbose_)
          cout << "Warning: Skipping read " << alignment.Name << ". It is mapped but missing MD tag." << endl;
	if (logf.is_open ())
	  logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "MISSMD" << '\n';
	bad_md_tag_readcount++;
      } else if (aligner.CreateRefFromQueryBases(alignment.QueryBases, alignment.CigarData, md_tag, anchors)) {
	bool clipfail = false;
	if (Realigner::CR_ERR_CLIP_ANCHOR == aligner.GetCreateRefError ())
	{
	  clipfail = true;
	  failed_clip_realigned_readcount ++;
	}

        if (!aligner.computeSWalignment(new_cigar_data, new_md_data, start_position_shift)) {
          if (aligner.verbose_)
            cout << "Error in the alignment! Not updating read information." << endl;
	  if (logf.is_open ())
	    logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "SWERR" << '\n';
	  error_sw_readcount++;
          writer.SaveAlignment(alignment);  // Write alignment unchanged
          continue;
        }

        if (!aligner.addClippedBasesToTags(new_cigar_data, new_md_data, alignment.QueryBases.size())) {
          if (aligner.verbose_)
            cout << "Error when adding clipped anchors back to tags! Not updating read information." << endl;
	  if (logf.is_open ())
	    logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "UNCLIPERR" << '\n';
          writer.SaveAlignment(alignment);  // Write alignment unchanged
	  error_unclip_readcount ++;
          continue;
        }
        new_md_tag = aligner.GetMDstring(new_md_data);
        realigned_readcounter++;

        // adjust start position of read
        if (!aligner.LeftAnchorClipped() and start_position_shift != 0) {
          new_position = aligner.updateReadPosition(alignment.CigarData, (int)start_position_shift, alignment.Position);
          if (new_position != alignment.Position) {
            pos_update_readcounter++;
            position_shift = true;
            alignment.Position = new_position;
          }
        }
        
        if (position_shift || alignment.CigarData.size () != new_cigar_data.size () || md_tag != new_md_tag)
	{
	  if (logf.is_open ())
	  {
	    logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "MOD";
	    if (position_shift)
	      logf << "-SHIFT";
	    if (clipfail)
	      logf << " NOCLIP";
	    logf << '\n';
	  }
	  modified_alignment_readcounter++;
	}
	else
	{
            if (logf.is_open ())
	    {
	      logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "UNMOD";
              if (clipfail)
	        logf << " NOCLIP";
	      logf << '\n';
	    }
	}

        if (aligner.verbose_){
          cout << alignment.Name << endl;
          cout << "------------------------------------------" << endl;
          // Wait for input to continue or quit program
          if (input.size() == 0)
            input = 'x';
          else if (input[0] != 'c' and input[0] != 'C')
            getline(cin, input);
          if (input.size()>0){
            if (input[0] == 'q' or input[0] == 'Q')
              return 1;
            else if (input[0] == 's' or input[0] == 'S')
              aligner.verbose_ = false;
          }
        }

        // Finally update alignment information
        alignment.CigarData = new_cigar_data;
        alignment.EditTag("MD", "Z" , new_md_tag);

      } // end of CreateRef else if
      else {
	switch (aligner.GetCreateRefError ())
	{
	  case Realigner::CR_ERR_RECREATE_REF:
            if (logf.is_open ())
	      logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "RECRERR" << '\n';
	    error_recreate_ref_readcount++;
	    break;
	  case Realigner::CR_ERR_CLIP_ANCHOR:
            if (logf.is_open ())
	      logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "CLIPERR" << '\n';
	    error_clip_anchor_readcount++;
	    break;
	  default:
		  //  On a good run this writes way too many reads to the log file - don't want to create a too large txt file
          //  if (logf.is_open ())
	      //logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "PERFECT" << '\n';
	    already_perfect_readcount++;
	    break;
	}
	
	if (aligner.verbose_) {
	  cout << alignment.Name << endl;
	  cout << "------------------------------------------" << endl;
	  // Wait for input to continue or quit program
	  if (input.size() == 0)
	    input = 'x';
	  else if (input[0] != 'c' and input[0] != 'C')
	    getline(cin, input);
	  if (input.size()>0){
	    if (input[0] == 'q' or input[0] == 'Q')
	      return 1;
	    else if (input[0] == 's' or input[0] == 'S')
	      aligner.verbose_ = false;
	  }
	}
      }

      // --- Debug output for Rajesh ---
      if (debug && aligner.invalid_cigar_in_input) {
        aligner.verbose_ = true;
        cout << "Invalid cigar string / md tag pair in read " << alignment.Name << endl;
        // Rerun reference generation to display error
        aligner.CreateRefFromQueryBases(alignment.QueryBases, alignment.CigarData, md_tag, anchors);

        aligner.verbose_ = verbose;
        aligner.invalid_cigar_in_input = false;
      }
      // --- --- ---


    } // end of if isMapped

    writer.SaveAlignment(alignment);

  } // end while loop over reads

  if (aligner.invalid_cigar_in_input)
    cerr << "WARNING bamrealignment: There were invalid cigar string / md tag pairs in the input bam file." << endl;

  // ----------------------------------------------------------------
  // program end -- output summary information
  cout   << "                            File: " << input_bam    << endl
         << "                     Total reads: " << readcounter  << endl
         << "                    Mapped reads: " << mapped_readcounter << endl;
  if (bad_md_tag_readcount)
    cout << "            Skipped: bad MD tags: " << bad_md_tag_readcount << endl;
  if (error_recreate_ref_readcount)
    cout << " Skipped: unable to recreate ref: " << error_recreate_ref_readcount << endl;
  if (error_clip_anchor_readcount)
    cout << "  Skipped: error clipping anchor: " << error_clip_anchor_readcount << endl;
  cout  <<  "       Skipped:  already perfect: " << already_perfect_readcount << endl
        <<  "           Total reads realigned: " << mapped_readcounter - already_perfect_readcount - bad_md_tag_readcount - error_recreate_ref_readcount - error_clip_anchor_readcount << endl;
  if (failed_clip_realigned_readcount)
    cout << "                      (including  " << failed_clip_realigned_readcount << " that failed to clip)" << endl;
  if (error_sw_readcount)
    cout << " Failed to complete SW alignment: " << error_sw_readcount << endl;
  if (error_unclip_readcount)
    cout << "         Failed to unclip anchor: " << error_unclip_readcount << endl;
  cout   << "           Succesfully realigned: " << realigned_readcounter << endl
         << "             Modified alignments: " << modified_alignment_readcounter << endl
         << "                Shifted position: " << pos_update_readcounter << endl;
  
  cout << "Processing time: " << (time(NULL)-start_time) << " seconds." << endl;
  cout << "INFO: The output BAM file may be unsorted." << endl;
  cout << "------------------------------------------" << endl;
  return 0;
}
Пример #6
0
int main(int argc, const char *argv[]) {
  OptArgs opts;  
  TraceConfig config;
  string inputDir;
  string outputDir;
  bool help;

  opts.ParseCmdLine(argc, argv);
  opts.GetOption(inputDir, "", '-', "source-dir");
  opts.GetOption(outputDir, "", '-', "output-dir");
  opts.GetOption(config.precision, "5", '-', "precision");
  opts.GetOption(config.numEvec, "7", '-', "num-evec");
  opts.GetOption(config.doDebug, "false", '-', "debug-files");
  opts.GetOption(config.compressionType, "delta", '-', "compression");
  opts.GetOption(config.numFlows, "-1", '-', "num-flows");
  opts.GetOption(config.numCores, "6", '-', "num-cores");
  opts.GetOption(config.errCon,"0",'-',"err-con");
  opts.GetOption(config.rankGood,"0",'-',"rank-good");
  opts.GetOption(config.pivot,"0",'-',"pivot");
  opts.GetOption(help, "false", 'h', "help");
  opts.GetOption(config.isThumbnail, "false", '-', "thumbnail");
  opts.GetOption(config.use_hard_est, "false",'-', "use-hard-est");
  opts.GetOption(config.t0_hard, "0", '-', "t0-hard");
  opts.GetOption(config.tmid_hard, "0", '-', "tmid-hard");
  opts.GetOption(config.sigma_hard, "0", '-', "sigma-hard");
  opts.GetOption(config.row_step, "100", '-', "row-step");
  opts.GetOption(config.col_step, "100", '-', "col-step");
  opts.GetOption(config.bg_param, "", '-', "region-param");
  opts.GetOption(config.grind_acq_0, "0", '-', "grind-acq0");
  if(help || inputDir.empty() || outputDir.empty()) {
    usage();
  }
  char *explog_path = NULL;
  explog_path = MakeExpLogPathFromDatDir(inputDir.c_str());
  int numFlows = config.numFlows;
  if (numFlows < 0) { 
    numFlows = GetTotalFlows(explog_path); 
  }

  // Check and setup our compression type
  TraceChunkSerializer serializer;
  serializer.SetRecklessAbandon(true);
  if (config.compressionType == "svd") {
    SvdDatCompress *dc = new SvdDatCompress(config.precision, config.numEvec);
    serializer.SetCompressor(dc);
    cout << "Doing lossy svd compression. (" << serializer.GetCompressionType() << ")" << endl;
  }
  // else if (config.compressionType == "svd+") {
  //   SvdDatCompressPlus *dc = new SvdDatCompressPlus();
  //   serializer.SetCompressor(dc);
  //   cout << "Doing lossy svd compression. (" << serializer.GetCompressionType() << ")" << endl;
  // }
  // else if (config.compressionType == "svd++") {
  //   SvdDatCompressPlusPlus *dc = new SvdDatCompressPlusPlus();
  //   if (config.errCon >0 )
  //     dc->SetErrCon(config.errCon);
  //   if (config.rankGood > 0 )
  //     dc->SetRankGood(config.rankGood);
  //   if (config.pivot > 0)
  //     dc->SetPivot(config.pivot);
  //   serializer.SetCompressor(dc);
  //   cout << "Doing lossy svd compression for good traces and delta for bad ones. (" << serializer.GetCompressionType() << ")" << endl;
  // }
  else if (config.compressionType == "delta") {
    VencoLossless *venco = new VencoLossless();
    serializer.SetCompressor(venco);
    cout << "Doing lossless delta compression. (" << serializer.GetCompressionType() << ")" << endl;
  }
  else if (config.compressionType == "delta-plain") {
    DeltaComp *delta = new DeltaComp();
    serializer.SetCompressor(delta);
    cout << "Doing lossless delta plain compression. (" << serializer.GetCompressionType() << ")" << endl;
  }
  else if (config.compressionType == "delta-plain-fst") {
    DeltaCompFst *delta = new DeltaCompFst();
    serializer.SetCompressor(delta);
    cout << "Doing lossless delta plain fast compression. (" << serializer.GetCompressionType() << ")" << endl;
  }
  else if (config.compressionType == "delta-plain-fst-smx") {
   DeltaCompFstSmX *delta = new DeltaCompFstSmX();
    serializer.SetCompressor(delta);
    cout << "Doing lossless delta plain fast compression. (" << serializer.GetCompressionType() << ")" << endl;
  }
  else if (config.compressionType == "none") {
    TraceCompressor *vanilla = new TraceNoCompress();
    serializer.SetCompressor(vanilla);
    cout << "Doing no compression. (" << serializer.GetCompressionType() << ")" << endl;
  }
  else {
    ION_ABORT("Don't recognize compression type: " + config.compressionType);
  }

  const char *id = GetChipId(explog_path);
  if (explog_path) free (explog_path);
  ChipIdDecoder::SetGlobalChipId(id);
  ImageTransformer::CalibrateChannelXTCorrection(inputDir.c_str(), "lsrowimage.dat");

  Image bfImg1;
  string bfFile = inputDir + "/beadfind_pre_0003.dat";
  bfImg1.LoadRaw(bfFile.c_str());
  const RawImage *bf1raw = bfImg1.GetImage(); 
  Mask mask(bf1raw->cols, bf1raw->rows);
  ImageTransformer::XTChannelCorrect(bfImg1.raw,bfImg1.results_folder);

  bfImg1.FilterForPinned (&mask, MaskEmpty, false);

  Image bfImg2;
  string bfFile2 = inputDir + "/beadfind_pre_0001.dat";
  bfImg2.LoadRaw(bfFile2.c_str());
  ImageTransformer::XTChannelCorrect(bfImg2.raw,bfImg1.results_folder);

  bfImg2.FilterForPinned (&mask, MaskEmpty, false);
  const RawImage *bf2raw = bfImg2.GetImage(); 


  GridMesh<T0Prior> t0Prior;
  T0Calc bfT0;
  /* Calc t0 and get prior. */
  cout << "Doing beadfind t0" << endl;
  GenerateBfT0Prior(config, bf1raw->image, bf1raw->baseFrameRate, bf1raw->rows, bf1raw->cols,
                    bf1raw->frames, bf1raw->timestamps,
                    config.row_step, config.col_step, &mask, bfT0, t0Prior);

  GridMesh<T0Prior> t0Prior2;
  T0Calc bfT02;
  GenerateBfT0Prior(config, bf2raw->image, bf2raw->baseFrameRate, bf2raw->rows, bf2raw->cols,
                    bf2raw->frames, bf2raw->timestamps,
                    config.row_step, config.col_step, &mask, bfT02, t0Prior2);

  SigmaTMidNucEstimation sigmaEst;
  sigmaEst.Init(config.rate_sigma_intercept, config.rate_sigma_slope, 
                config.t0_tmid_intercept, config.t0_tmid_slope, bf1raw->baseFrameRate);
  GridMesh<SigmaEst> sigmaTMid;
  bfImg1.Close();
  bfImg2.Close();

  // Calculate individual well t0 by looking at neighboring regions
  vector<float> wellT0;
  bfT0.CalcIndividualT0(wellT0, 0);
  vector<float> wellT02;
  bfT02.CalcIndividualT0(wellT02, 0);
  for (size_t i =0; i< wellT0.size();i++) {
    if (wellT0[i] > 0 && wellT02[i] > 0) {
      wellT0[i] = (wellT0[i] + wellT02[i])/2.0f;
    }
    else {
      wellT0[i] = max(wellT0[i], wellT02[i]);
    }
  }

  // Average the region level t0, should we do this first and then just do sinle well level?
  for (size_t bIx = 0; bIx < bfT0.GetNumRegions(); bIx++) {
    double t1 = bfT0.GetT0(bIx);
    double t2 = bfT02.GetT0(bIx);
    if (t1 > 0 && t2 > 0) {
      t1 = (t1 + t2)/2.0;
    }
    else {
      t1 = max(t1,t2);
    }
    bfT0.SetT0(bIx, t1);
  }

  // Single thread first dat
  for (size_t datIx = 0; datIx < 1; ++datIx) {
    cout << "Doing: " << datIx << endl;
    char buffer[2048];
    snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.dat", inputDir.c_str(), (int) datIx);
    string datFile = buffer;
    /* Use prior to calculate t0 and slope. */
    Image datImg;
    T0Calc t0;
    datImg.LoadRaw(datFile.c_str());
    //    ImageTransformer::XTChannelCorrect(datImg.raw,datImg.results_folder);
    const RawImage *datRaw = datImg.GetImage(); 

    /* Estimate sigma and t_mid_nuc */
    if (datIx == 0) {
      cout << "Doing acquisition t0" << endl;

      GenerateAcqT0Prior(config, datRaw->image, datRaw->baseFrameRate, datRaw->rows, datRaw->cols,
                         datRaw->frames, datRaw->timestamps,
                         config.row_step, config.col_step, &mask, t0, t0Prior);
      
      ClockTimer timer;
      cout << "Estimating sigma." << endl;
      sigmaTMid.Init(datRaw->rows, datRaw->cols, config.row_step, config.col_step);
      for (size_t bIx = 0; bIx < t0.GetNumRegions(); bIx++) {
        t0.SetT0(bIx, bfT0.GetT0(bIx));
      }
      int neighbors = 2;
      if (config.isThumbnail) {
        cout << "Doing thumbnail version of slope." << endl;
        neighbors = 1;
      }
      EstimateSigmaValue(t0, sigmaEst, sigmaTMid, neighbors);
      timer.PrintMilliSeconds(cout,"Sigma Est took:");
      string sigmaFile = outputDir + "/sigma_tmid_est.txt";
      OutputSigmaTmidEstimates(sigmaTMid, sigmaFile.c_str());
    }

    /* For each region do shifting */
    ClockTimer timer;
    cout << "Shifting traces" << endl;
    timer.StartTimer();
    //    ShiftTraces(bfT0, wellT0, datRaw->frames, datRaw->baseFrameRate, datRaw->timestamps, datRaw->image);
    timer.PrintMilliSeconds(cout,"Shift took:");
    if (!config.bg_param.empty()) {
      DataCube<int> rowsCols;
      DataCube<float> tmidSigma;
      DataCube<float> fitTmidSigma;
      string path = config.bg_param + ":/region/region_location";
      if (!H5File::ReadDataCube(path, rowsCols)) {
        ION_ABORT("Couldn't read file: " + path);
      }
      path = config.bg_param + ":/region/region_init_param";
      if (!H5File::ReadDataCube(path, fitTmidSigma)) {
        ION_ABORT("Couldn't read file: " + path);
      }
      for (size_t i = 0; i < rowsCols.GetNumX(); i++) {
        int row = rowsCols.At(i,1,0);
        int col = rowsCols.At(i,0,0);
        SigmaEst &est = sigmaTMid.GetItemByRowCol(row, col);
        float tmid_est =  fitTmidSigma.At(i,0,0);
        float sigma_est = fitTmidSigma.At(i,1,0);
        est.mTMidNuc = tmid_est;
        est.mSigma = sigma_est;
      }
      string fitSigmaFile = outputDir + "/bg_fit_sigma_tmid_est.txt";
      OutputSigmaTmidEstimates(sigmaTMid, fitSigmaFile.c_str());

      // path = config.bg_param + ":/region/region_init_param";
      // if (!H5File::ReadMatrix(path, tmidSigma)) {
      //   ION_ABORT("Couldn't read file: " + path);
      // }
      // for (size_t i = 0; i < rowsCols.n_rows; i++) {
      //   int row = rowsCols.at(i,0);
      //   int col = rowsCols.at(i,1);
      //   SigmaEst &est = sigmaTMid.GetItemByRowCol(row, col);
      //   float tmid_est =  tmidSigma.at(i,0);
      //   float sigma_est = tmidSigma.at(i,1);
      //   est.mTMidNuc = tmid_est;
      //   est.mSigma = sigma_est;
      // }
      // string sigmaFile = outputDir + "/supplied_sigma_tmid_est.txt";
      // OutputSigmaTmidEstimates(sigmaTMid, sigmaFile.c_str());
    }
    else if (config.use_hard_est) {
      for (size_t i = 0; i < bfT0.GetNumRegions(); i++) {
        bfT0.SetT0(i,config.t0_hard * datRaw->baseFrameRate + config.time_start_slop);
      }
      for (size_t i = 0; i < sigmaTMid.GetNumBin(); i++) {
        SigmaEst &est = sigmaTMid.GetItem(i);
        est.mTMidNuc = config.tmid_hard;
        est.mSigma = config.sigma_hard;
        est.mT0 = config.t0_hard;
      }
    }
    /* Use t0 and sigma to get the time compression bkgModel wants. */
    cout << "Generating chunks" << endl;
    //    GridMesh<TraceChunk> traceChunks;
    SynchDat sdat;
    if (datIx == 0  && config.grind_acq_0 > 0) {
      int nTimes = config.grind_acq_0;
      timer.StartTimer();
      size_t processMicroSec = 0;
      size_t hdf5MicroSec = 0;
      size_t compressMicroSec = 0;
      size_t convertMicroSec = 0;
      for (int i = 0; i <nTimes; i++) {
        //GridMesh<TraceChunk> traceChunken;
        SynchDat sdatIn;
        AddMetaData(sdat, datRaw, datIx);
	ClockTimer convTimer;
        GenerateDataChunks(config, bfT0, datRaw, config.row_step, config.col_step, sigmaTMid, sdatIn.mChunks,datImg);
	convertMicroSec += convTimer.GetMicroSec();
        snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.sdat", outputDir.c_str(), (int)datIx);
        serializer.Write(buffer, sdatIn);
	processMicroSec += serializer.computeMicroSec;
	hdf5MicroSec += serializer.ioMicroSec;
	compressMicroSec += serializer.compressMicroSec;
      }
      size_t usec = timer.GetMicroSec();
      cout << "Took: " << usec / 1.0e6 << " seconds, " << usec / (nTimes * 1.0f) << " usec per write." << endl;
      timer.PrintMilliSeconds(cout,"Chunks took:");
      cout << "Read took: " << processMicroSec / (1e3 * nTimes) << " milli seconds per sdat compute." << endl;
      cout << "Read took: " << hdf5MicroSec / (1e3 * nTimes) << " milli seconds per sdat hdf5." << endl;
      cout << "Read took: " << compressMicroSec / (1e3 * nTimes) << " milli seconds per sdat compressing." << endl;
      cout << "Read took: " << convertMicroSec / (1e3 * nTimes) << " milli seconds per sdat converting." << endl;
      exit(0);
    }
    else {
      timer.StartTimer();
      AddMetaData(sdat, datRaw, datIx);
      GenerateDataChunks(config, bfT0, datRaw, config.row_step, config.col_step, sigmaTMid, sdat.mChunks,datImg);
      timer.PrintMilliSeconds(cout,"Chunks took:");
        if (datIx == 0 && config.doDebug) {
          OutputTraceChunks(sdat.mChunks,"flow_0_data_chunks.txt");
        }
    }
    datImg.Close();    

    /* Serialize onto disk. */
    snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.sdat", outputDir.c_str(), (int)datIx);
    serializer.Write(buffer, sdat);
    /* Read back in first flow for checking */
    if (datIx == 0) {
      TraceChunkSerializer readSerializer;
      readSerializer.SetRecklessAbandon(true);
      //      GridMesh<TraceChunk> traceChunksIn;  
      SynchDat sdatIn;
      readSerializer.Read(buffer, sdatIn);
      if (datIx == 0 && config.doDebug) {
        OutputTraceChunks(sdatIn.mChunks, "flow_0_data_chunks_read.txt");
      }
      SampleQuantiles<float> s(50000);
      SampleQuantiles<float> s2(50000);
      SampleQuantiles<float> sAbs(50000);
      SampleStats<double> ss;
      int diffCount = 0;
      for (size_t bIx = 0; bIx < sdatIn.mChunks.mBins.size(); bIx++) {
        if (sdatIn.mChunks.mBins[bIx].mT0 != sdat.mChunks.mBins[bIx].mT0) {
          cout << "Got: " << sdatIn.mChunks.mBins[bIx].mT0 << " vs: " << sdat.mChunks.mBins[bIx].mT0 << endl;
          exit(1);
        }
        for (size_t i = 0; i < sdatIn.mChunks.mBins[bIx].mData.size(); i++) {
          double diff = (double)sdatIn.mChunks.mBins[bIx].mData[i] - (double)sdat.mChunks.mBins[bIx].mData[i];
          if (!std::isfinite(diff)) {
            cout << "NaNs!!" << endl;
          }
          if (diffCount < 10 && fabs(diff) > .00001) { // != 0) {
            diffCount++;
            cout << "Bin: " << bIx << " well: " << i << " diff is: " << diff << endl;
          }
          s.AddValue(diff);
          sAbs.AddValue(fabs(diff));
          ss.AddValue(sqrt(diff * diff));
          s2.AddValue(sqrt(diff * diff));
        }
      }
      cout << "Median rms: " << s2.GetMedian()  << " Avg: " << ss.GetMean() << " diff: " << s.GetMedian() << endl;
      cout << "Abs(diff) Quantiles:" << endl;
      for (size_t i = 0; i <= 100; i+=10) {
        cout << i << "\t" << sAbs.GetQuantile(i/100.0) << endl;
      }
    }      
  }
  // do the next N flows multithreaded
  if (numFlows > 1) {
    PJobQueue jQueue (config.numCores, numFlows-1);  
    vector<CreateSDat> jobs(numFlows -1);
    // for (int i = 0; i < 4; i++) {
    //   char buffer[2048];
    //   snprintf(buffer, sizeof(buffer), "%s/beadfind_pre_%.4d.dat", inputDir.c_str(), (int) i);
    //   string input = buffer;
    //   snprintf(buffer, sizeof(buffer), "%s/beadfind_pre_%.4d.sdat", outputDir.c_str(), (int)i);
    //   string output = buffer;
    //   jobs[i].Init(&config, input, output, &wellT0, &bfT0, &sigmaTMid);
    //   jQueue.AddJob(jobs[i]);
    // }

    // jQueue.WaitUntilDone();
    for (int i = 1; i < numFlows; i++) {
      char buffer[2048];
      snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.dat", inputDir.c_str(), (int) i);
      string input = buffer;
      snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.sdat", outputDir.c_str(), (int)i);
      string output = buffer;
      jobs[i-1].Init(&config, input, output, &wellT0, &bfT0, &sigmaTMid, i);
      jQueue.AddJob(jobs[i-1]);
    }
    jQueue.WaitUntilDone();
  }
  /* Serialize into backbround models */
  cout << "Done." << endl;
}
Пример #7
0
int main(int argc, const char *argv[]) {
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  int hpLength;
  string statsOut;
  string alignmentOut;
  string pairedOut;
  string flowsOut;
  string summaryOut;
  string samFile;
  string qScoreCol;
  string wellsFile;
  string bfmaskFile;
  string snrFile;
  string binnedHpSigFile;
  string flowErrFile;
  string gcErrFile;
  int gcWin;
  string flowOrder;
  string keySeq;
  int numFlows;
  bool help;
  int qLength;
  double colCenter;
  double rowCenter;
  int colSize;
  int rowSize;
  int sampleSize;
  string wellsToUse;
  string run1, run2;
  opts.GetOption(run1, "", '-', "sff1");
  opts.GetOption(run2, "", '-', "sff2");
  opts.GetOption(wellsToUse, "", '-', "use-wells");
  opts.GetOption(samFile, "", '-', "sam-parsed");
  opts.GetOption(statsOut, "", '-', "stats-out");
  opts.GetOption(flowsOut, "", '-', "flows-out");
  opts.GetOption(alignmentOut, "", '-', "align-out");
  opts.GetOption(summaryOut, "", '-', "summary-out");
  opts.GetOption(pairedOut, "", '-', "paired-out");
  opts.GetOption(numFlows, "40", '-', "num-flows");
  opts.GetOption(hpLength, "6", '-', "max-hp");
  opts.GetOption(qScoreCol, "q7Len", '-', "qscore-col");
  opts.GetOption(qLength, "25", '-', "min-qlength");
  opts.GetOption(help,   "false", 'h', "help");
  opts.GetOption(wellsFile,   "", '-', "wells-file");
  opts.GetOption(bfmaskFile,   "", '-', "bfmask-file");
  opts.GetOption(snrFile,   "", '-', "snr-file");
  opts.GetOption(binnedHpSigFile,   "", '-', "binned-hp-sig-file");
  opts.GetOption(flowErrFile, "", '-', "flow-err-file");
  opts.GetOption(gcErrFile, "", '-', "gc-err-file");
  opts.GetOption(flowOrder, "", '-', "flow-order");
  opts.GetOption(keySeq, "", '-', "key-seq");
  opts.GetOption(colCenter, "0.5", '-', "col-center");
  opts.GetOption(rowCenter, "0.5", '-', "row-center");
  opts.GetOption(colSize, "0", '-', "col-size");
  opts.GetOption(rowSize, "0", '-', "row-size");
  opts.GetOption(gcErrFile, "", '-', "gc-err-file");
  opts.GetOption(gcWin, "40", '-', "gc-win");
  opts.GetOption(sampleSize, "100000", '-', "sample-size");
  if (help || samFile.empty() || statsOut.empty() || summaryOut.empty()) {
    usage();
  }
  opts.CheckNoLeftovers();

  // Some checks to make sure sensible bounds have been set
  if(colCenter < 0 || colCenter > 1) {
    cerr << "AnalyzeHPErrs - col-center must be in the range [0,1]" << endl;
    exit(1);
  }
  if(rowCenter < 0 || rowCenter > 1) {
    cerr << "AnalyzeHPErrs - row-center must be in the range [0,1]" << endl;
    exit(1);
  }
  if(colSize < 0) {
    cerr << "AnalyzeHPErrs - col-size cannot be negative." << endl;
    exit(1);
  }
  if(rowSize < 0) {
    cerr << "AnalyzeHPErrs - row-size cannot be negative." << endl;
    exit(1);
  }

  // Determine rows & cols if a bfmask file was supplied
  int nRow=0;
  int nCol=0;
  if(!bfmaskFile.empty()) {
    if(GetRowColFromBfmask(bfmaskFile, &nRow, &nCol)) {
      cerr << "AnalyzeHPErrs - problem determining rows & columns from bfmask file " << bfmaskFile << endl;
      exit(1);
    }
  }
	
  // Set up fds object
  FlowDiffStats* fds;
  if (!run1.empty()) {
    SffDiffStats* sds = new SffDiffStats(hpLength, nCol, nRow, qScoreCol, run1, run2);
    if (!pairedOut.empty())
      sds->SetPairedOut(pairedOut);
    fds = dynamic_cast<FlowDiffStats*>(sds);
  }
  else {
    GenomeDiffStats* gds = new GenomeDiffStats(hpLength, nCol, nRow, qScoreCol);
    if(alignmentOut != "") {
      gds->SetAlignmentsOut(alignmentOut);
    }
    if (!flowsOut.empty()) {
      gds->SetFlowsOut(flowsOut);
    }
    fds = dynamic_cast<FlowDiffStats*>(gds);
  }

  if (gcErrFile != "") {
    fds->SetFlowGCOut(gcErrFile);
    fds->SetGCWindowSize(gcWin);
  }

  if(keySeq != "") {
    fds->SetKeySeq(keySeq);
  }
  if(flowOrder != "") {
    fds->SetFlowOrder(flowOrder);
  }
  fds->SetStatsOut(statsOut);

  if (!wellsToUse.empty()) {
    std::vector<int> wells;
    std::vector<bool> use;
    ReadSetFromFile(wellsToUse, 0, wells);
    use.resize(nRow * nCol, false);
    int count = 0;
    ReservoirSample<int> wellSample(sampleSize);
    for (size_t i = 0; i < wells.size(); i++) {
      wellSample.Add(wells[i]);
    }
    wells = wellSample.GetData();
    for (size_t i = 0; i < wells.size(); i++) {
      use[wells[i]] = true;
      count++;
    }
    cout << "Read: " << count << " reads." << endl;
    fds->SetWellToAnalyze(use);
  }


  // Set integer-value row & column bounds
  int minRow=-1;
  int maxRow=-1;
  int minCol=-1;
  int maxCol=-1;
  if(colSize > 0 || rowSize > 0) {
    if(bfmaskFile.empty()) {
      cerr << "AnalyzeHPErrs - must specify bfmask file when restricting row or column ranges" << endl;
      exit(1);
    }
    if(rowSize > 0) {
      minRow = floor(nRow * rowCenter - rowSize / 2.0);
      maxRow = minRow + rowSize;
      minRow = std::max(0,minRow);
      maxRow = std::min(nRow,maxRow);
    }
    if(colSize > 0) {
      minCol = floor(nCol * colCenter - colSize / 2.0);
      maxCol = minCol + colSize;
      minCol = std::max(0,minCol);
      maxCol = std::min(nCol,maxCol);
    }
  }

  if (wellsFile != "") {
    std::vector<int32_t> xSubset, ySubset;
    fds->FillInSubset(samFile, qLength, minRow, maxRow, minCol, maxCol, xSubset, ySubset);
    if(bfmaskFile.empty()) {
      cerr << "AnalyzeHPErrs - must specify bfmask file when specifying wells file" << endl;
      exit(1);
    }
    fds->SetWellsFile(wellsFile, nRow, nCol, numFlows, xSubset, ySubset);
  }
  if (snrFile != "") {
    cout << "Opening snr file: " << snrFile << endl;
    fds->SetSNROut(snrFile);
  }
  if (binnedHpSigFile != "") {
    cout << "Opening binned HP signal file: " << binnedHpSigFile << endl;
    fds->SetBinnedHpSigOut(binnedHpSigFile);
  }
  if (flowErrFile != "") {
    cout << "Opening flow err file: " << flowErrFile << endl;
    fds->SetFlowErrOut(flowErrFile);
  }
  ofstream summary;
  summary.open(summaryOut.c_str());
  cout << "Reading and analyzing alignments from: " << samFile << endl;
  if(minCol > -1 || maxCol > -1)
    cout << "  Restricting to " << (maxCol-minCol) << " cols in the range [" << minCol << "," << maxCol << ")" << endl;
  if(minRow > -1 || maxRow > -1)
    cout << "  Restricting to " << (maxRow-minRow) << " rows in the range [" << minRow << "," << maxRow << ")" << endl;

  fds->SetAlignmentInFile(samFile);
  fds->FilterAndCompare(numFlows, summary, qLength, minRow, maxRow, minCol, maxCol);

  summary.close();
  delete fds;
  cout << "Done." << endl;
  return 0;
}
Пример #8
0
int main(int argc, const char *argv[])
{
    OptArgs opts;
    opts.ParseCmdLine(argc, argv);
    bool help, combineSffs;
    string sffFile;
    string bamFile;
    vector<string> infiles;
    opts.GetOption(help,"false", 'h', "help");
    opts.GetOption(combineSffs,"false", 'c', "combine-sffs");
    opts.GetOption(bamFile,"",'o',"out-filename");
    opts.GetLeftoverArguments(infiles);

    if(help || infiles.empty())
    {
        usage();
    }

	if((!combineSffs) && infiles.size() > 1)
	{
        cerr << "sff2bam ERROR: if you want to combine all sff files into a single bam file, please use option -c true." << endl;
        usage();
	}

    sffFile= infiles.front();

    if(bamFile.length() < 1)
    {
        bamFile = sffFile.substr(0, sffFile.length() - 3);
        bamFile += "bam";
    }

    sff_file_t* sff_file = sff_fopen(sffFile.c_str(), "r", NULL, NULL);
    if(!sff_file)
    {
        cerr << "sff2bam ERROR: fail to open " << sffFile << endl;
        exit(1);
    }

	// All sff files must have the same flow and key
	if(combineSffs && infiles.size() > 1)
	{
        for(size_t n = 1; n < infiles.size(); ++n)
		{
			sff_file_t* sff_file2 = sff_fopen(infiles[n].c_str(), "r", NULL, NULL);
			if(!sff_file2)
			{
				sff_fclose(sff_file);
				cerr << "sff2bam ERROR: fail to open " << infiles[n] << endl;
				exit(1);
			}

			if(strcmp(sff_file2->header->flow->s, sff_file->header->flow->s) != 0 ||
				strcmp(sff_file2->header->key->s, sff_file->header->key->s) != 0)
			{
				sff_fclose(sff_file);
				sff_fclose(sff_file2);
				cerr << "sff2bam ERROR: " << sffFile << " and " << infiles[n] << " have different flows or keys." << endl;
				exit(1);
			}

			sff_fclose(sff_file2);
		}
	}

    sff_t* sff = NULL;
    // Open 1st read for read group name
    sff = sff_read(sff_file);
    if(!sff)
    {
        sff_fclose(sff_file);
        cerr << "sff2bam ERROR: fail to read " << sffFile << endl;
        exit(1);
    }

    // Set up BAM header
    SamHeader sam_header;
    sam_header.Version = "1.4";
    sam_header.SortOrder = "unsorted";

    SamProgram sam_program("sff2bam");
    sam_program.Name = "sff2bam";
    sam_program.Version = SFF2BAM_VERSION;
    sam_program.CommandLine = "sff2bam";
    sam_header.Programs.Add(sam_program);

    string rgname = sff->rheader->name->s;
    int index = rgname.find(":");
    rgname = rgname.substr(0, index);

    SamReadGroup read_group(rgname);
    read_group.FlowOrder = sff->gheader->flow->s;
    read_group.KeySequence = sff->gheader->key->s;

    sam_header.ReadGroups.Add(read_group);

    RefVector refvec;
    BamWriter bamWriter;
    bamWriter.SetCompressionMode(BamWriter::Compressed);

    if(!bamWriter.Open(bamFile, sam_header, refvec))
    {
        sff_fclose(sff_file);
        cerr << "sff2bam ERROR: failed to open " << bamFile << endl;
        exit(1);
    }

    // Save 1st read
    BamAlignment bam_alignment0;
    bam_alignment0.SetIsMapped(false);
    bam_alignment0.Name = sff->rheader->name->s;
    size_t nBases = sff->rheader->n_bases + 1 - sff->rheader->clip_qual_left;
    if(sff->rheader->clip_qual_right > 0)
    {
        nBases = sff->rheader->clip_qual_right - sff->rheader->clip_qual_left;
    }
    if(nBases > 0)
    {
        bam_alignment0.QueryBases.reserve(nBases);
        bam_alignment0.Qualities.reserve(nBases);
        for (int base = sff->rheader->clip_qual_left - 1; base < sff->rheader->clip_qual_right - 1; ++base)
        {
            bam_alignment0.QueryBases.push_back(sff->read->bases->s[base]);
            bam_alignment0.Qualities.push_back(sff->read->quality->s[base] + 33);
        }
    }

    int clip_flow = 0;
    for (unsigned int base = 0; base < sff->rheader->clip_qual_left && base < sff->rheader->n_bases; ++base)
    {
        clip_flow += sff->read->flow_index[base];
    }
    if (clip_flow > 0)
    {
        clip_flow--;
    }

    bam_alignment0.AddTag("RG","Z", rgname);
    bam_alignment0.AddTag("PG","Z", string("sff2bam"));
    bam_alignment0.AddTag("ZF","i", clip_flow); // TODO: trim flow
    vector<uint16_t> flowgram0(sff->gheader->flow_length);
    copy(sff->read->flowgram, sff->read->flowgram + sff->gheader->flow_length, flowgram0.begin());
    bam_alignment0.AddTag("FZ", flowgram0);
    sff_destroy(sff);
    sff = NULL;

    bamWriter.SaveAlignment(bam_alignment0);

    // Save rest reads
    while(NULL != (sff = sff_read(sff_file)))
    {
        BamAlignment bam_alignment;
        bam_alignment.SetIsMapped(false);
        bam_alignment.Name = sff->rheader->name->s;   
        nBases = sff->rheader->n_bases + 1 - sff->rheader->clip_qual_left;
        if(sff->rheader->clip_qual_right > 0)
        {
            nBases = sff->rheader->clip_qual_right - sff->rheader->clip_qual_left;
        }
        if(nBases > 0)
        {
            bam_alignment.QueryBases.reserve(nBases);
            bam_alignment.Qualities.reserve(nBases);
            for (int base = sff->rheader->clip_qual_left - 1; base < sff->rheader->clip_qual_right - 1; ++base)
            {
                bam_alignment.QueryBases.push_back(sff->read->bases->s[base]);
                bam_alignment.Qualities.push_back(sff->read->quality->s[base] + 33);
            }
        }

        clip_flow = 0;
        for (unsigned int base = 0; base <= sff->rheader->clip_qual_left && base < sff->rheader->n_bases; ++base)
        {
            clip_flow += sff->read->flow_index[base];
        }
        if (clip_flow > 0)
        {
            clip_flow--;
        }

        bam_alignment.AddTag("RG","Z", rgname);
        bam_alignment.AddTag("PG","Z", string("sff2bam"));
        bam_alignment.AddTag("ZF","i", clip_flow); // TODO: trim flow
        vector<uint16_t> flowgram(sff->gheader->flow_length);
        copy(sff->read->flowgram, sff->read->flowgram + sff->gheader->flow_length, flowgram.begin());
        bam_alignment.AddTag("FZ", flowgram);
        sff_destroy(sff);
        sff = NULL;

        bamWriter.SaveAlignment(bam_alignment);
    }

	sff_fclose(sff_file);

	if(combineSffs && infiles.size() > 1)
	{
        for(size_t n = 1; n < infiles.size(); ++n)
		{
			sff_file_t* sff_file2 = sff_fopen(infiles[n].c_str(), "r", NULL, NULL);

			while(NULL != (sff = sff_read(sff_file2)))
			{
				BamAlignment bam_alignment;
				bam_alignment.SetIsMapped(false);
				bam_alignment.Name = sff->rheader->name->s;   
				nBases = sff->rheader->n_bases + 1 - sff->rheader->clip_qual_left;
				if(sff->rheader->clip_qual_right > 0)
				{
					nBases = sff->rheader->clip_qual_right - sff->rheader->clip_qual_left;
				}
				if(nBases > 0)
				{
					bam_alignment.QueryBases.reserve(nBases);
					bam_alignment.Qualities.reserve(nBases);
					for (int base = sff->rheader->clip_qual_left - 1; base < sff->rheader->clip_qual_right - 1; ++base)
					{
						bam_alignment.QueryBases.push_back(sff->read->bases->s[base]);
						bam_alignment.Qualities.push_back(sff->read->quality->s[base] + 33);
					}
				}

				clip_flow = 0;
				for (unsigned int base = 0; base <= sff->rheader->clip_qual_left && base < sff->rheader->n_bases; ++base)
				{
					clip_flow += sff->read->flow_index[base];
				}
				if (clip_flow > 0)
				{
					clip_flow--;
				}

				bam_alignment.AddTag("RG","Z", rgname);
				bam_alignment.AddTag("PG","Z", string("sff2bam"));
				bam_alignment.AddTag("ZF","i", clip_flow); // TODO: trim flow
				vector<uint16_t> flowgram(sff->gheader->flow_length);
				copy(sff->read->flowgram, sff->read->flowgram + sff->gheader->flow_length, flowgram.begin());
				bam_alignment.AddTag("FZ", flowgram);
				sff_destroy(sff);
				sff = NULL;

				bamWriter.SaveAlignment(bam_alignment);
			}

			sff_fclose(sff_file2);
		}
	}

    bamWriter.Close();    

    return 0;
}
Пример #9
0
int main (int argc, const char *argv[])
{

  if (argc == 1) {
    printf ("BaseCallerLite - Bare bone basecaller\n");
    printf ("\n");
    printf ("Usage:\n");
    printf ("BaseCallerLite [options]\n");
    printf ("\tOptions:\n");
    printf ("\t\tComing soon\n");
    printf ("\n");
    return 1;
  }

  string libKey = "TCAG";
  string inputDirectory = ".";
  string outputDirectory = ".";
  bool singleCoreCafie = false;

  BaseCallerLite basecaller;
  basecaller.regionXSize = 50;
  basecaller.regionYSize = 50;
  basecaller.runId = "BCLTE";
  basecaller.CF = 0.0;
  basecaller.IE = 0.0;
  basecaller.numWellsCalled = 0;
  basecaller.nextRegionX = 0;
  basecaller.nextRegionY = 0;


  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  opts.GetOption(basecaller.CF, "0.0", '-',  "cf");
  opts.GetOption(basecaller.IE, "0.0", '-',  "ie");
  opts.GetOption(inputDirectory, ".", '-',  "input-dir");
  opts.GetOption(outputDirectory, ".", '-',  "output-dir");
  opts.GetOption(singleCoreCafie, "false", '-',  "singlecorecafie");

  int numWorkers = 2*numCores();
  if (singleCoreCafie)
    numWorkers = 1;


  Mask mask (1, 1);
  if (mask.SetMask ((inputDirectory + "/bfmask.bin").c_str()))
    exit (EXIT_FAILURE);
  RawWells wells (inputDirectory.c_str(),"1.wells");
  //SetWellsToLiveBeadsOnly(wells,&mask);
  wells.OpenForIncrementalRead();

  basecaller.maskPtr = &mask;
  basecaller.wellsPtr = &wells;
  basecaller.rows = mask.H();
  basecaller.cols = mask.W();
  basecaller.flowOrder.SetFlowOrder(wells.FlowOrder(), wells.NumFlows());
  basecaller.numFlows = wells.NumFlows();


  basecaller.numRegionsX = (basecaller.cols +  basecaller.regionXSize - 1) / basecaller.regionXSize;
  basecaller.numRegionsY = (basecaller.rows +  basecaller.regionYSize - 1) / basecaller.regionYSize;
  basecaller.numRegions = basecaller.numRegionsX * basecaller.numRegionsY;

  basecaller.libKeyFlows.assign(basecaller.numFlows,0);
  basecaller.libNumKeyFlows = basecaller.flowOrder.BasesToFlows(libKey, &basecaller.libKeyFlows[0], basecaller.numFlows);

  basecaller.libSFF.Open(outputDirectory+"/rawlib.sff", basecaller.numRegions,
      basecaller.flowOrder, libKey);


  time_t startBasecall;
  time(&startBasecall);

  pthread_mutex_init(&basecaller.wellsAccessMutex, NULL);

  pthread_t worker_id[numWorkers];
  for (int iWorker = 0; iWorker < numWorkers; iWorker++)
    if (pthread_create(&worker_id[iWorker], NULL, BasecallerWorkerWrapper, &basecaller)) {
      printf("*Error* - problem starting thread\n");
      return 1;
    }

  for (int iWorker = 0; iWorker < numWorkers; iWorker++)
    pthread_join(worker_id[iWorker], NULL);

  pthread_mutex_destroy(&basecaller.wellsAccessMutex);

  time_t endBasecall;
  time(&endBasecall);

  basecaller.libSFF.Close();

  printf("\nBASECALLING: called %d of %d wells in %1.1f seconds with %d threads\n",
      basecaller.numWellsCalled, basecaller.rows*basecaller.cols, difftime(endBasecall,startBasecall), numWorkers);
  printf("Generated library SFF with %d reads\n", basecaller.libSFF.num_reads());

  return 0;
}
Пример #10
0
int main(int argc, const char *argv[]) {
  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  string regionFile;
  vector<string> matchStrings;
  vector<string> datFiles;
  int maskCenter = MaskEmpty;
  int maskMatch = MaskLive | MaskBead | MaskDud;
  string maskFile;
  string outPrefix;
  bool setHex;
  int frameStart,frameEnd;
  bool help;
  bool useDuds;
  int optCenter, optMatch;
  opts.GetOption(help, "false", 'h', "help");
  opts.GetOption(regionFile, "", '-', "region-file");
  opts.GetOption(datFiles, "", '-', "dat-files");
  opts.GetOption(matchStrings, "", '-', "matches");
  opts.GetOption(outPrefix, "", '-', "out-prefix");
  opts.GetOption(useDuds, "", '-', "use-duds");
  opts.GetOption(maskFile, "", '-', "mask-file");
  opts.GetOption(frameStart, "14", '-', "frame-start");
  opts.GetOption(frameEnd, "20", '-', "frame-end");
  opts.GetOption(optCenter, "0", '-', "center");
  opts.GetOption(optMatch, "0", '-', "match");
  opts.GetOption(setHex, "false", '-', "set-hex");
  
  if (useDuds) {
    maskMatch = MaskDud;
  }
  else if (optMatch != 0) {
    maskMatch = optMatch;
  }
  if (optCenter != 0) {
    maskCenter = optCenter;
  }

  vector<Traces> flows;
  vector<struct Region> regions;
  cout << "Loading mask." << endl;
  Mask mask(maskFile.c_str());
  mask.SetHex(setHex);
  for (size_t i = 0; i < matchStrings.size(); i++) {
    ION_ASSERT(matchStrings[i].length() == matchStrings[0].length(), "Match strings must match in length.");
  }
  cout << "Loading regions." << endl;
  LoadRegions(regionFile, regions);
  cout << "Loading traces." << endl;
  LoadTraces(mask, datFiles, flows);

  for (size_t i = 0; i < matchStrings.size(); i++) {
    cout << "Using frame num: " << frameStart << " to " << frameEnd << " for match string: " << matchStrings[i] << endl;
    ParseMetrics(matchStrings[i], i, mask, maskCenter, maskMatch, regions, flows, outPrefix, frameStart, frameEnd);
  }
  cout << "Saw: " << centerSeen << " wells and: " << haystackNeg << " negatives." << endl;
}
Пример #11
0
int RetrieveParameterVectorDouble(OptArgs &opts, Json::Value& json, char short_name, const string& long_name_hyphens, const string& default_value, vector<double>& ret_vector)
{
  string long_name_underscores = GetRidOfDomainAndHyphens(long_name_hyphens);
  string value = default_value;

  if(value.length() > 0)
  {
	  vector<string> words;
	  split(value,',',words);
	  ret_vector.clear();
	  for (size_t i = 0; i < words.size(); i++) {
		char *end;
		int err = errno;
		errno = 0;
		ret_vector.push_back(strtod(words[i].c_str(), &end));
		if (errno != 0 || *end != '\0') {
		  cout << "Error converting: " + words[i] + " to an double for option: " + long_name_hyphens << endl;
		  return errno;
		}
		errno = err;    
	  }
  }
  string source = "builtin default";

  if (json.isMember(long_name_underscores)) {
	  ret_vector.clear();
	  size_t sz = json[long_name_underscores].size();
	  char buf[1000];
      if(sz > 0)
	  {
          if(sz == 1)
          {
              if(json[long_name_underscores][0].isString())
              {
                  ret_vector.push_back(atof(json[long_name_underscores][0].asCString()));
                  value = json[long_name_underscores][0].asCString();
              }
              else
              {
                  ret_vector.push_back(json[long_name_underscores][0].asDouble());
                  sprintf(buf, "%f", ret_vector[0]);
                  value = buf;
              }
          }
          else
          {
              value = "";
              for(int i = 0; i < (int)sz - 1; i++)
              {
                  if(json[long_name_underscores][i].isString())
                  {
                      ret_vector.push_back(atof(json[long_name_underscores][i].asCString()));
                      value += json[long_name_underscores][i].asCString();
                      value += ",";
                  }
                  else
                  {
                      ret_vector.push_back(json[long_name_underscores][i].asDouble());
                      sprintf(buf, "%f,", ret_vector[i]);
                      string ss = buf;
                      value += ss;
                  }
              }

              if(json[long_name_underscores][(int)sz - 1].isString())
              {
                  ret_vector.push_back(atof(json[long_name_underscores][(int)sz - 1].asCString()));
                  value += json[long_name_underscores][(int)sz - 1].asCString();
              }
              else
              {
                  ret_vector.push_back(json[long_name_underscores][(int)sz - 1].asDouble());
                  sprintf(buf, "%f", ret_vector[(int)sz - 1]);
                  string ss = buf;
                  value += ss;
              }
          }
          source = "parameters json file";
      }
  }

  if (opts.HasOption(short_name, long_name_hyphens)) {
	  ret_vector.clear();
	  opts.GetOption(ret_vector, default_value, short_name, long_name_hyphens);

	  char buf[1000];
	  if(ret_vector.empty())
	  {
	      cout << "Error setting: there is no value set for option: " + long_name_hyphens << endl;
		  return 1;
	  }
	  else if(ret_vector.size() == 1)
	  {
		  sprintf(buf, "%f", ret_vector[0]);
		  value = buf;
	  }
	  else
	  {
		  value = "";
		  for(size_t i = 0; i < ret_vector.size() - 1; i++) {
			  sprintf(buf, "%f,", ret_vector[i]);
			  string ss = buf;
			  value += ss;
		  }
		  sprintf(buf, "%f", ret_vector[ret_vector.size() - 1]);
		  string ss = buf;
		  value += ss;
	  }
	  source = "command line option";
  }

  cout << setw(35) << long_name_hyphens << " = " << setw(10) << value << " (double,  " << source << ")" << endl;
  return 0;
}
Пример #12
0
int main(int argc, const char *argv[]) {
  OptArgs opts;
  string position_file;
  string h5file_in;
  string source;
  string h5file_out;
  string destination;
  string positions_file;
  bool help;
  string flowlimit_arg;
  unsigned int flowlimit;
  vector<string>otherArgs;

  DumpStartingStateOfExtractWells (argc,argv);

  opts.ParseCmdLine(argc, argv);
  opts.GetOption(h5file_in, "", 'i', "input");
  opts.GetOption(source, "", 's', "source");
  opts.GetOption(h5file_out, "", 'o', "output");
  opts.GetOption(destination, "", 'd', "destination");
  opts.GetOption(flowlimit_arg, "", 'f', "flowlimit");
  opts.GetOption(positions_file, "", 'p', "positions");
  opts.GetOption(help, "false", 'h', "help");
  opts.GetLeftoverArguments(otherArgs);

  // input data processing
  string line;
  vector<size_t> row_val;
  vector<size_t> col_val;
  ifstream filestream;
  if ( ! positions_file.empty() )
    filestream.open(&positions_file.At(0));
  istream &input = ( filestream.is_open() ) ? filestream : cin;


		      
  while ( getline(input, line) )
  {
    int num = -1;
    vector<size_t> ints;
    istringstream ss(line);
    while ( ss >> num && ints.size() < 2 ) {
      if (num < 0) {
	fprintf(stderr, "Found negative integer %d\n", num);
	exit(-1);
      }
      else
	ints.push_back((size_t)num);
    }
    if (ints.size() != 2) {
      fprintf(stderr, "Found %d integers in %s, expected 2\n", (int)ints.size(), &line[0]);
      continue;
    }
    row_val.push_back(ints.at(0));
    col_val.push_back(ints.at(1));
  }
  if (row_val.size() == 0 ) {
      fprintf(stdout, "No positions to extract, check input\n");
      exit(0);
  }    
  vector<size_t>input_positions(row_val.size(), 0);

  int numCPU = (int)sysconf( _SC_NPROCESSORS_ONLN );
  int numThreads = MAXTHREADS < numCPU ? MAXTHREADS : numCPU;
  fprintf(stdout, "Using %d threads of %d cores\n", numThreads, numCPU);

  if (source.empty())
    source = source + SIGNAL_IN;
  H5ReplayReader reader = H5ReplayReader(h5file_in, &source[0]);
  if ( h5file_out.empty() )
    h5file_out = h5file_out + H5FILE_OUT;
  if ( destination.empty() )
    destination = destination + SIGNAL_OUT;

  reader.Open();
  int rank = reader.GetRank();
  vector<hsize_t>dims(rank);
  vector<hsize_t>chunks(rank);
  reader.GetDims(dims);
  reader.GetChunkSize(chunks);
  reader.Close();

  // convert input row, col positions to indices
  for (hsize_t i=0; i<input_positions.size(); i++)
    input_positions.At(i) = RowColToIndex(row_val.At(i), col_val.At(i), dims.At(0), dims.At(1));
  sort(input_positions.begin(), input_positions.end());

  fprintf(stdout, "Opened for read %s:%s with rank %d, row x col x flow dims=[ ", &h5file_in[0], &source[0], rank);
  for (int i=0; i<rank; i++)
    fprintf(stdout, "%d ", (int)dims.At(i));
  fprintf(stdout, "], chunksize=[ ");
  for (int i=0; i<rank; i++)
    fprintf(stdout, "%d ", (int)chunks.At(i));
  fprintf(stdout, "]\n");

  
  H5ReplayRecorder recorder = H5ReplayRecorder(h5file_out, &destination[0],reader.GetType(),2);
  recorder.CreateFile();


  {
    vector<hsize_t> dims_pos(1, input_positions.size());
    string pos_name = "position";
    H5ReplayRecorder recorder_pos = H5ReplayRecorder(h5file_out, &pos_name[0],H5T_NATIVE_ULONG,1);
    recorder_pos.CreateDataset(dims_pos);
  }

  {
    string chip_dims = "chip_dims";
    H5ReplayRecorder recorder_chip_dims = H5ReplayRecorder(h5file_out, &chip_dims[0],H5T_NATIVE_ULLONG,1);
    vector<hsize_t> offset_dims(1,0);
    vector<hsize_t> count_dims(1,3);
    recorder_chip_dims.CreateDataset(count_dims);
    recorder_chip_dims.Write(offset_dims, count_dims, offset_dims, count_dims, &dims[0]);
  }
  if (flowlimit_arg.empty())
    flowlimit = dims.At(2);
  else
    flowlimit = atoi(flowlimit_arg.c_str());

  flowlimit = (flowlimit < dims.At(2)) ? flowlimit : dims.At(2);
  fprintf(stdout, "Using %u flows\n", flowlimit);

  // chunks no bigger than 100000
  vector<hsize_t>chunks_out(2);
  chunks_out.At(0) = (input_positions.size() < 10000) ? input_positions.size() : 100000;
  chunks_out.At(1) = chunks.At(2);

  recorder.CreateDataset(chunks_out);
  vector<hsize_t> extension(2);
  extension.At(0) = input_positions.size();
  extension.At(1) = dims.At(2);
  recorder.ExtendDataSet(extension); // extend if necessary

  fprintf(stdout, "Opening for write %s:%s with rank %d, position x flow chunks=[ ", &h5file_out[0], &destination[0], (int)chunks_out.size());
  for (int i=0; i<(int)chunks_out.size(); i++)
    fprintf(stdout, "%d ", (int)chunks_out.At(i));
  fprintf(stdout, "]\n");

  int max_threads_ever = (dims.At(0)/chunks.At(0) +1)*(dims.At(1)/chunks.At(1) +1);
  thread_flags.resize (max_threads_ever, 0);
  // fprintf(stdout, "max_threads_ever = %d\n", max_threads_ever);
  unsigned int thread_id = 0;
  vector<thread_args> my_args( max_threads_ever );

  size_t runningCount = 0;

  // layout is rows x cols x flows
  for (size_t row=0; row<dims.At(0); ) {
    for (size_t col=0; col<dims.At(1); ) {

      size_t ix = 0;
      hsize_t offset_out = 0;
      hsize_t count_out = 0;

      vector<size_t> limit(2);
      limit.At(0) = ( row+chunks.At(0) < dims.At(0) ) ? row+chunks.At(0) : dims.At(0);
      limit.At(1) = ( col+chunks.At(1) < dims.At(1) ) ? col+chunks.At(1) : dims.At(1);
      // fprintf(stdout, "Block row=%lu, col=%lu, count=[%lu %lu]\n", row, col, limit.At(0), limit.At(1));
      // bool first_time=true;
      for (size_t rr=row; rr<limit.At(0) && ix < input_positions.size(); rr++) {
	for (size_t cc=col; cc<limit.At(1) && ix < input_positions.size(); cc++) {
	  size_t pos = input_positions.At(ix);
	  size_t chp_indx = RowColToIndex(rr,cc, dims.At(0), dims.At(1));
	  // if (first_time)
	  //   fprintf(stdout, "Entering loop with pos=%lu, ix=%lu, chp_indx=%lu\n", pos, ix, chp_indx);
	  // first_time = false;

	  if ( chp_indx < pos)
	    continue;

	  while ( chp_indx > pos){
	    // fprintf(stdout, "chp_indx=%lu > pos=%lu, incrementing ix=%lu\n", chp_indx, pos, ix);
	    ix++;
	    if (ix == input_positions.size()){
	      break;
	    }
	    pos = input_positions.At(ix);
	    // first_time = true;
	  }

	  if( chp_indx == pos){
	    if ( count_out == 0)
	      offset_out = runningCount;
	    count_out++;
	    runningCount++;
	    // fprintf(stdout, "found: rr=%d, cc=%d, pos=%d, index=%d, ix=%lu, runningCount=%lu\n", (int)rr, (int)cc, (int)pos, (int)chp_indx, ix, runningCount);
	    ix++;
	    continue;
	  }
	  
	}
      }

      assert (ix <= input_positions.size() );
      assert (runningCount <= input_positions.size() );
      
      if (count_out > 0) {
	pthread_t thread;
	int thread_status = 0;

	assert( thread_id < thread_flags.size() );
	my_args.at(thread_id).row = row;
	my_args.at(thread_id).col = col;
	my_args.at(thread_id).chunks = &chunks;
	my_args.at(thread_id).chunks_out = &chunks_out;
	my_args.at(thread_id).dims = &dims;
	my_args.at(thread_id).h5file_in = &h5file_in;
	my_args.at(thread_id).source = &source;
	my_args.at(thread_id).h5file_out = &h5file_out;
	my_args.at(thread_id).destination = &destination;
	my_args.at(thread_id).offset_out = offset_out;
	my_args.at(thread_id).count_out = count_out;
	my_args.at(thread_id).input_positions = &input_positions;
	my_args.at(thread_id).thread_id = thread_id;
	my_args.at(thread_id).flowlimit = flowlimit;

	// fprintf(stdout, "creating thread %d from row=%d (max %d), column=%d (max %d), offset_out=%llu, count_out=%llu\n", thread_id, (int)row, (int)dims.At(0), (int)col, (int)dims.At(1), offset_out, count_out);
	while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > numThreads) {
	  // only have to be approximate, don't worry about races
	  fprintf(stdout, "Sleeping before creating thread %d from row=%d (max %d), column=%d (max %d), offset_out=%llu, count_out=%llu ...\n", thread_id, (int)row, (int)dims.At(0), (int)col, (int)dims.At(1), offset_out, count_out);
	  sleep(1);
	}
	thread_flags.At(thread_id) = 1;
	thread_status = pthread_create(&thread, NULL, do_subset, (void *)&my_args[thread_id]);
	// do_subset((void *)&my_args[thread_id]);
	assert (thread_status >= 0);
	thread_id++;
      }
      col += chunks.At(1);
      //fflush(stdout);
    }
    row += chunks.At(0);
  }
  while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > 0) {
    // wait for the threads to finish
    // fprintf(stdout, "Waiting ...\n");
    sleep(1);
  }

  assert (runningCount == input_positions.size() );
  cout << "Done." << endl;
  pthread_exit(NULL);
}
Пример #13
0
int main(int argc, const char *argv[]) 
{
	OptArgs opts;
	opts.ParseCmdLine(argc, argv);
	string inFile, outFile;
	bool help = false;
	bool version = false;
	double lower = -5.0;
	double upper = 28.0;
	opts.GetOption(inFile, "", 'i', "input-file");
	opts.GetOption(outFile, "", 'o', "output-file");
	opts.GetOption(lower, "-5.0", '-', "wells-convert-low");
	opts.GetOption(upper, "28.0", '-', "wells-convert-high");
	opts.GetOption(help, "false", 'h', "help");
	opts.GetOption(version, "false", 'v', "version");
	opts.CheckNoLeftovers();
  
	if (version) 
	{
		fprintf (stdout, "%s", IonVersion::GetFullVersion("RawWellsConverter").c_str());
		exit(0);
	}

	if (inFile.empty() || help)
	{
		cout << "RawWellsConverter - Convert unsigned short type wells file to float type wells file, or vice versa." << endl 
			 << "options: " << endl
			 << "   -i,--input-file    input wells file." << endl
			 << "   -o,--output-file   output wells file." << endl
			 << "     ,--wells-convert-low   lower bound for converting to unsigned short." << endl
			 << "     ,--wells-convert-high  upper bound for converting to unsigned short." << endl
			 << "   -h,--help          this message." << endl
			 << "" << endl 
			 << "usage: " << endl
			 << "   RawWellsConverter -i input_path/1.wells -o output_path/1.wells " << endl;
		exit(1);
	}

	struct stat sb;
	if(stat(inFile.c_str(), &sb) != 0)
	{
		cerr << "RawWellsConverter ERROR: " << inFile << " does not exist." << endl; 
		exit (1);
	}

	if (outFile.empty())
	{
		outFile = inFile;
		outFile += ".converted";
	}

	string cmd("cp ");
	cmd += inFile;
	cmd += " ";
	cmd += outFile;
	int ret0 = system(cmd.c_str());

	hid_t root = H5Fopen(outFile.c_str(), H5F_ACC_RDWR, H5P_DEFAULT);
	if(root < 0)				
	{				
		cerr << "RawWellsConverter ERROR: Fail to open " << outFile << endl;
		exit(1);
	}	

	H5G_info_t group_info;
	group_info.nlinks = 0;
	if(H5Gget_info(root, &group_info) < 0)
	{
		H5Fclose(root);
		cerr << "RawWellsConverter ERROR: Fail H5Gget_info." << endl;
		exit(1);
	}

	char name[10];
	string sName;
	bool bWells = false;
	bool bCopies = false;
	for(unsigned int i = 0; i < group_info.nlinks; ++i)
	{
		int size = H5Gget_objname_by_idx(root, i, NULL, 0);
		if(H5Gget_objname_by_idx(root, i, name, size + 1) < 0)
		{
			H5Fclose(root);
			cerr << "RawWellsConverter ERROR: Fail H5Gget_objname_by_idx." << endl;
			exit(1);
		}
		else
		{
			sName = name;
			if(sName == "wells")
			{
				bWells = true;
			}
			if(sName == "wells_copies")
			{
				bCopies = true;
			}
		}
	}

	if(!bWells)
	{
		H5Fclose(root);
		cerr << "RawWellsConverter ERROR: There is no dataset wells." << endl;
		exit(1);
	}

	hid_t dsWells = H5Dopen2(root, "wells", H5P_DEFAULT);
	if(dsWells < 0)
	{
		H5Fclose(root);
		cerr << "RawWellsConverter ERROR: Fail H5Dopen2 wells." << endl;
		exit(1);
	}
	  
	bool saveAsUShort = false;
	if(H5Aexists(dsWells, "convert_low") > 0)
	{
		hid_t attrLower = H5Aopen(dsWells, "convert_low", H5T_NATIVE_FLOAT );
		H5Aread(attrLower, H5T_NATIVE_FLOAT, &lower); 
		saveAsUShort = true;
		H5Aclose(attrLower);
	}
	if(H5Aexists(dsWells, "convert_high") > 0)
	{
		hid_t attrUpper = H5Aopen(dsWells, "convert_high", H5T_NATIVE_FLOAT);
		H5Aread(attrUpper, H5T_NATIVE_FLOAT, &upper); 
		saveAsUShort = true;
		H5Aclose(attrUpper);
	}

	hid_t dataSpace = H5Dget_space(dsWells);
	if(dataSpace < 0)
	{
		H5Dclose(dsWells);
		H5Fclose(root);
		cerr << "RawWellsConverter ERROR: Fail H5Dget_space wells." << endl;
		exit(1);
	}

	hssize_t dsSize = H5Sget_simple_extent_npoints(dataSpace);		
	if(dsSize < 1)
	{
		H5Sclose(dataSpace);
		H5Dclose(dsWells);
		H5Fclose(root);
		cerr << "RawWellsConverter ERROR: Wrong size of dataset wells - " << dsSize << endl;
		exit(1);
	}

	int nRows = 0;
	int nCols = 0;
	int nFlows = 0;

	int rank = H5Sget_simple_extent_ndims(dataSpace);
    if(rank != 3)
	{
		bCopies = false;
	}
	else
	{
		hsize_t dims_out[3];
		int status_n = H5Sget_simple_extent_dims(dataSpace, dims_out, NULL);
		if(status_n < 0)
		{
			bCopies = false;
		}
		else
		{
			nRows = dims_out[0];
			nCols = dims_out[1];
			nFlows = dims_out[2];
		}
	}

	float* fPtr = new float[dsSize];
	unsigned short* usPtr = new unsigned short[dsSize];
	if(fPtr == NULL || usPtr == NULL)
	{
		H5Sclose(dataSpace);
		H5Dclose(dsWells);
		H5Fclose(root);
		cerr << "RawWellsConverter ERROR: Fail to allocate fPtr or usPtr." << endl;
		exit(1);
	}

	hid_t dcpl = H5Dget_create_plist(dsWells);
	if(dcpl < 0)
	{
		H5Sclose(dataSpace);
		H5Dclose(dsWells);
		H5Fclose(root);
		cerr << "RawWellsConverter ERROR: Fail H5Dget_create_plist." << endl;
		exit(1);
	}
	hid_t dapl = H5Dget_access_plist(dsWells);
	if(dapl < 0)
	{
		H5Pclose(dcpl);
		H5Sclose(dataSpace);
		H5Dclose(dsWells);
		H5Fclose(root);
		cerr << "RawWellsConverter ERROR: Fail H5Dget_access_plist." << endl;
		exit(1);
	}

	if(saveAsUShort)
	{
		cout << "RawWellsConverter: converting unsigned short wells file - " << inFile << " to float wells file - " << outFile << " with boundary (" << lower << "," << upper << ")" << endl;
	
		herr_t ret = H5Dread(dsWells, H5T_NATIVE_USHORT, H5S_ALL, H5S_ALL, H5P_DEFAULT, usPtr);
		H5Dclose(dsWells);
		if(ret < 0)
		{
			delete [] fPtr;
			delete [] usPtr;
			H5Sclose(dataSpace);
			H5Pclose(dcpl);
			H5Pclose(dapl);
			H5Fclose(root);
			cerr << "RawWellsConverter ERROR: Fail to read dataset wells." << endl;
			exit(1);
		}

		float factor = 65535.0 / (upper - lower);
		float* fPtr2 = fPtr;
		unsigned short* usPtr2 = usPtr;

		for(unsigned int i = 0; i < dsSize; ++i, ++fPtr2, ++usPtr2)
		{
			(*fPtr2) = (float)(*usPtr2) / factor + lower;
		}

		delete [] usPtr;

		if(bCopies)
		{
			vector<float> copies(nRows * nCols, 1.0);
			hid_t dsCopies = H5Dopen2(root, "wells_copies", H5P_DEFAULT);
			if(dsCopies < 0)
			{
				cerr << "RawWellsConverter WARNING: 1.wells files does not have wells_copies." << endl;
			}
			else
			{
				hid_t dataSpace2 = H5Dget_space(dsCopies);
				if(dataSpace2 < 0)
				{
					H5Dclose(dsCopies);
					cerr << "RawWellsConverter WARNING: fail to H5Dget_space for dataset wells_copies." << endl;          
				}
				else
				{
					hssize_t dsSize2 = H5Sget_simple_extent_npoints(dataSpace2);
					H5Sclose(dataSpace2);
					if(dsSize2 != (hssize_t)(nRows * nCols))
					{
						H5Dclose(dsCopies);
						cerr << "RawWellsConverter WARNING: dataset wells_copies size is " << dsSize2 << ", it is different from nRows * nCols = " << nRows * nCols << endl;          
					}
					else
					{
						herr_t ret = H5Dread(dsCopies, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &copies[0]);
						H5Dclose(dsCopies);
						if(ret < 0)
						{
							copies.resize(nRows * nCols, 1.0);
							cerr << "RawWellsConverter WARNING: failto load dataset wells_copies." << endl;          
						}
					}
				}
			}
			
			uint64_t fptrCount = 0;
			uint64_t copyCount = 0;
			for(int row = 0; row < nRows; ++row)
			{
				for(int col = 0; col < nCols; ++col)
				{
					for(int flow = 0; flow < nFlows; ++flow)
					{
						if(copies[copyCount] > 0)
						{
							fPtr[fptrCount] *= copies[copyCount];
						}
						else
						{
							fPtr[fptrCount] = -1.0;
						}
						
						++fptrCount;
					}

					++copyCount;
				}
			}
		}

	    H5Ldelete(root, "wells", H5P_DEFAULT);

		hid_t dsWells2 = H5Dcreate2 (root, "wells", H5T_NATIVE_FLOAT, dataSpace, H5P_DEFAULT, dcpl, dapl);
		if(dsWells2 < 0)
		{
			delete [] fPtr;
			H5Sclose(dataSpace);
			H5Pclose(dcpl);
			H5Pclose(dapl);
			H5Fclose(root);
			cerr << "RawWellsConverter ERROR: Fail to create dataset wells." << endl;
			exit(1);
		}

		ret = H5Dwrite(dsWells2, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, fPtr);
		delete [] fPtr;
		H5Dclose(dsWells2);
		H5Sclose(dataSpace);
		H5Pclose(dcpl);
		H5Pclose(dapl);
		H5Fclose(root);
		if(ret < 0)
		{
			cerr << "RawWellsConverter ERROR: Fail to write dataset wells." << endl;
			exit(1);
		}
	}
	else
	{
		cout << "RawWellsConverter: converting float wells file - " << inFile << " to unsigned short wells file - " << outFile << " with boundary (" << lower << "," << upper << ")" << endl;
	
		herr_t ret = H5Dread(dsWells, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, fPtr);
		H5Dclose(dsWells);
		if(ret < 0)
		{
			delete [] fPtr;
			delete [] usPtr;
			H5Sclose(dataSpace);
			H5Pclose(dcpl);
			H5Pclose(dapl);
			H5Fclose(root);
			cerr << "RawWellsConverter ERROR: Fail to read dataset wells." << endl;
			exit(1);
		}

		float factor = 65535.0 / (upper - lower);
		float* fPtr2 = fPtr;
		unsigned short* usPtr2 = usPtr;

		for(unsigned int i = 0; i < dsSize; ++i, ++fPtr2, ++usPtr2)
		{
			if(*fPtr2 < lower)
			{
				(*usPtr2) = 0;
			}
			else if(*fPtr2 > upper)
			{
				(*usPtr2) = 65535;
			}
			else
			{
				(*usPtr2) = (unsigned short)((*fPtr2 - lower) * factor);
			}
		}

		delete [] fPtr;

	    H5Ldelete(root, "wells", H5P_DEFAULT);

		hid_t dsWells2 = H5Dcreate2 (root, "wells", H5T_NATIVE_USHORT, dataSpace, H5P_DEFAULT, dcpl, dapl);
		if(dsWells2 < 0)
		{
			delete [] usPtr;
			H5Sclose(dataSpace);
			H5Pclose(dcpl);
			H5Pclose(dapl);
			H5Fclose(root);
			cerr << "RawWellsConverter ERROR: Fail to create dataset wells." << endl;
			exit(1);
		}

		ret = H5Dwrite(dsWells2, H5T_NATIVE_USHORT, H5S_ALL, H5S_ALL, H5P_DEFAULT, usPtr);
		delete [] usPtr;
		if(dsWells2 < 0)
		{
			H5Dclose(dsWells2);
			H5Sclose(dataSpace);
			H5Pclose(dcpl);
			H5Pclose(dapl);
			H5Fclose(root);
			cerr << "RawWellsConverter ERROR: Fail to write dataset wells." << endl;
			exit(1);
		}

		float lower2 = (float)lower;
		float upper2 = (float)upper;
		hsize_t dimsa[1];
		dimsa[0] = 1;
		hid_t dataspacea = H5Screate_simple(1, dimsa, NULL);
		hid_t attrLower = H5Acreate(dsWells2, "convert_low", H5T_NATIVE_FLOAT, dataspacea, H5P_DEFAULT, H5P_DEFAULT );
		H5Awrite(attrLower, H5T_NATIVE_FLOAT, &lower2);
		H5Aclose(attrLower);
		hid_t attrUpper = H5Acreate(dsWells2, "convert_high", H5T_NATIVE_FLOAT, dataspacea, H5P_DEFAULT, H5P_DEFAULT );
		H5Awrite(attrUpper, H5T_NATIVE_FLOAT, &upper2);
		H5Aclose(attrUpper);
		H5Sclose(dataspacea);

		H5Dclose(dsWells2);
		H5Sclose(dataSpace);
		H5Pclose(dcpl);
		H5Pclose(dapl);
		H5Fclose(root);
	}

	return 0;
}
Пример #14
0
int main(int argc, const char *argv[])
{
#ifdef _DEBUG
  atexit(memstatus);
  dbgmemInit();
#endif /* _DEBUG */

  printf ("%s - %s-%s (%s)\n", argv[0], IonVersion::GetVersion().c_str(), IonVersion::GetRelease().c_str(), IonVersion::GetSvnRev().c_str());

  string bamInputFilename;
  string fastaInputFilename;
  string jsonOutputFilename;
  bool help;

  OptArgs opts;
  opts.ParseCmdLine(argc, argv);
  opts.GetOption(bamInputFilename,    "",             '-',  "bam");
  opts.GetOption(fastaInputFilename,  "",             '-',  "ref");
  opts.GetOption(jsonOutputFilename,  "TFStats.json", '-',  "output-json");
  opts.GetOption(help,                "false",        'h',  "help");
  opts.CheckNoLeftovers();

  if (help || bamInputFilename.empty() || fastaInputFilename.empty())
    return showHelp();


  // Parse BAM header

  BAMReader bamReader(bamInputFilename);
  bamReader.open();
  bam_header_t *header = (bam_header_t *)bamReader.get_header_ptr();

  int numFlows = 0;
  string flowOrder;
  string key;

  if (header->l_text >= 3) {
    if (header->dict == 0)
      header->dict = sam_header_parse2(header->text);
    int nEntries = 0;
    char **tmp = sam_header2list(header->dict, "RG", "FO", &nEntries);
    if (nEntries) {
      flowOrder = tmp[0];
      numFlows = flowOrder.length();
    }
    if (tmp)
      free(tmp);
    nEntries = 0;
    tmp = sam_header2list(header->dict, "RG", "KS", &nEntries);
    if (nEntries) {
      key = tmp[0];
    }
    if (tmp)
      free(tmp);
  }

  if (numFlows <= 0) {
    fprintf(stderr, "[TFMapper] Could not retrieve flow order from FO BAM tag. SFF-specific tags absent?\n");
    exit(1);
  }
  if (key.empty()) {
    fprintf(stderr, "[TFMapper] Could not retrieve key sequence from KS BAM tag. SFF-specific tags absent?\n");
    exit(1);
  }
  //printf("Retrieved flow order from bam: %s (%d)\n", flowOrder.c_str(), numFlows);
  //printf("Retrieved key from bam: %s\n", key.c_str());


  // Retrieve test fragment sequences

  vector<string>  referenceSequences;
  PopulateReferenceSequences(referenceSequences, fastaInputFilename, header->n_targets, header->target_name, string(""));


  //  Process the BAM reads and generate metrics

  int numTFs = header->n_targets;
  vector<int>     TFCount(numTFs,0);
  MetricGeneratorQualityHistograms  metricGeneratorQualityHistograms[numTFs];
  MetricGeneratorHPAccuracy         metricGeneratorHPAccuracy[numTFs];
  MetricGeneratorSNR                metricGeneratorSNR[numTFs];
  MetricGeneratorAvgIonogram        metricGeneratorAvgIonogram[numTFs];

  for (BAMReader::iterator i = bamReader.get_iterator(); i.good(); i.next()) {

    BAMRead bamRead = i.get();
    int bestTF = bamRead.get_tid();
    if (bestTF < 0)
      continue;
    BAMUtils bamUtil(bamRead);
    TFCount[bestTF]++;

    // Extract flowspace signal from FZ BAM tag

    uint16_t *bam_flowgram = NULL;
    uint8_t *fz = bam_aux_get(bamRead.get_bam_ptr(), "FZ");
    if (fz != NULL) {
      if (fz[0] == (uint8_t)'B' && fz[1] == (uint8_t)'S' && *((uint32_t *)(fz+2)) == (uint32_t)numFlows)
        bam_flowgram = (uint16_t *)(fz+6);
    }
    if (bam_flowgram == NULL) {
      fprintf(stderr, "[TFMapper] Could not retrieve flow signal from FZ BAM tag. SFF-specific tags absent?\n");
      exit(1);
    }


    // Use alignments to generate "synchronized" flowspace reference and read ionograms
    // TODO: Do proper flowspace alignment

    string genome = key + bamUtil.get_tdna();
    string calls = key + bamUtil.get_qdna();

    int numBases = min(genome.length(),calls.length());
    vector<int> refIonogram(numFlows, 0);
    vector<int> readIonogram(numFlows, 0);

    int numFlowsRead = 0;
    int numFlowsRef = 0;
    char gC = flowOrder[0];
    int gBC = 0;

    for (int iBase = 0; (iBase < numBases) && (numFlowsRead < numFlows) && (numFlowsRef < numFlows); iBase++) {

      // Conversion for reads (independent of reference)
      if (calls[iBase] != '-') {
        while ((calls[iBase] != flowOrder[numFlowsRead]) && (numFlowsRead < numFlows))
          numFlowsRead++;
        if (numFlowsRead < numFlows)
          readIonogram[numFlowsRead]++;
      }

      if (genome[iBase] != '-') {

        if (genome[iBase] != gC) {
          // Since a new homopolymer begins, need to drop off the old one
          while ((gC != flowOrder[numFlowsRef]) && (numFlowsRef < numFlows)) {
            numFlowsRef++;
            if (numFlowsRef < numFlows)
              refIonogram[numFlowsRef] = 0;
          }
          if (numFlowsRef < numFlows)
            refIonogram[numFlowsRef] = gBC;

          gC = genome[iBase];
          gBC = 0;
        }
        gBC++;

        if (genome[iBase] == calls[iBase])
          numFlowsRef = numFlowsRead;
      }
    }

    int validFlows = min(numFlowsRef, numFlowsRead);


    metricGeneratorSNR[bestTF].AddElement(bam_flowgram ,key.c_str(), flowOrder);
    metricGeneratorAvgIonogram[bestTF].AddElement(bam_flowgram, numFlows);
    metricGeneratorQualityHistograms[bestTF].AddElement(bamUtil.get_phred_len(10),bamUtil.get_phred_len(17));
    for (int iFlow = 0; iFlow < validFlows-20; iFlow++)
      metricGeneratorHPAccuracy[bestTF].AddElement(refIonogram[iFlow],readIonogram[iFlow]);
  }


  // Save stats to a json file

  Json::Value outputJson(Json::objectValue);

  for(int i = 0; i < numTFs; i++) {
    if (TFCount[i] < minTFCount)
      continue;

    Json::Value currentTFJson(Json::objectValue);
    currentTFJson["TF Name"] = header->target_name[i];
    currentTFJson["TF Seq"] = referenceSequences[i];
    currentTFJson["Num"] = TFCount[i];
    currentTFJson["Top Reads"] = Json::Value(Json::arrayValue); // Obsolete

    metricGeneratorSNR[i].PrintSNR(currentTFJson);
    metricGeneratorHPAccuracy[i].PrintHPAccuracy(currentTFJson);
    metricGeneratorQualityHistograms[i].PrintMetrics(currentTFJson);
    metricGeneratorAvgIonogram[i].PrintIonograms(currentTFJson);

    outputJson[header->target_name[i]] = currentTFJson;
  }

  bamReader.close();  // Closing invalidates the header pointers

  if (!jsonOutputFilename.empty()) {
    ofstream out(jsonOutputFilename.c_str(), ios::out);
    if (out.good())
      out << outputJson.toStyledString();
  }

  return 0;
}
Пример #15
0
int main(int argc, const char *argv[]) {
  OptArgs opts;  
  string h5file;
  string source;
  string destination;
  vector<string> infiles;
  bool help;
  string flowlimit_arg;
  unsigned int flowlimit;

  DumpStartingStateOfNormWells (argc,argv);

  opts.ParseCmdLine(argc, argv);
  opts.GetOption(h5file, "", '-', "h5file");
  opts.GetOption(source, "", 's', "source");
  opts.GetOption(destination, "", 'd', "destination");
  opts.GetOption(flowlimit_arg, "", 'f', "flowlimit");
  opts.GetOption(help, "false", 'h', "help");
  opts.GetLeftoverArguments(infiles);
  if(help || infiles.empty() || (infiles.size() > 1) ) {
    usage();
  }
  h5file = infiles.front();

  int numCPU = (int)sysconf( _SC_NPROCESSORS_ONLN );
  int numThreads = MAXTHREADS < numCPU ? MAXTHREADS : numCPU;
  fprintf(stdout, "Using %d threads of %d cores\n", numThreads, numCPU);

  if (source.empty())
    source = source + SIGNAL_IN;
  H5ReplayReader reader = H5ReplayReader(h5file, &source[0]);
  if ( destination.empty() )
    destination = destination + SIGNAL_OUT;

  H5ReplayRecorder recorder = (source.compare(destination)==0)
    ? H5ReplayRecorder(h5file, &destination[0])
    : H5ReplayRecorder(h5file, &destination[0],reader.GetType(),reader.GetRank());

  reader.Open();
  int rank = reader.GetRank();
  vector<hsize_t>dims(rank,0);
  vector<hsize_t>chunks(rank,0);
  reader.GetDims(dims);
  reader.GetChunkSize(chunks);
  reader.Close();

  fprintf(stdout, "Opening for read %s:%s with rank %d, row x col x flow dims=[ ", &h5file[0], &source[0], rank);
  for (int i=0; i<rank; i++)
    fprintf(stdout, "%d ", (int)dims[i]);
  fprintf(stdout, "], chunksize=[ ");
  for (int i=0; i<rank; i++)
    fprintf(stdout, "%d ", (int)chunks[i]);
  fprintf(stdout, "]\n");

  if (flowlimit_arg.empty())
    flowlimit = dims[2];
  else
    flowlimit = atoi(flowlimit_arg.c_str());

  flowlimit = (flowlimit < dims[2]) ? flowlimit : dims[2];
  fprintf(stdout, "Using %u flows\n", flowlimit);

  // hard code region size to be at least 100x100
  chunks[0] = (chunks[0] < 100) ? 100 : chunks[0];
  chunks[1] = (chunks[1] < 100) ? 100 : chunks[1];

  recorder.CreateDataset(chunks);
  
  int max_threads_ever = (dims[0]/chunks[0] +1)*(dims[1]/chunks[1] +1);
  thread_flags.resize (max_threads_ever, 0);
  // fprintf(stdout, "max_threads_ever = %d\n", max_threads_ever);
  unsigned int thread_id = 0;
  vector<compute_norm_args> my_args( max_threads_ever );
  
  // layout is rows x cols x flows
  for (hsize_t row=0; row<dims[0]; ) {
    for (hsize_t col=0; col<dims[1]; ) {
      pthread_t thread;
      int thread_status;

      assert( thread_id < thread_flags.size() );
      my_args.at(thread_id).row = row;
      my_args.at(thread_id).col = col;
      my_args.at(thread_id).chunks = &chunks;
      my_args.at(thread_id).dims = &dims;
      my_args.at(thread_id).h5file = &h5file;
      my_args.at(thread_id).source = &source;
      my_args.at(thread_id).destination = &destination;
      my_args.at(thread_id).thread_id = thread_id;
      my_args.at(thread_id).flowlimit = flowlimit;

      fprintf(stdout, "creating thread %d from row=%d (max %d), column=%d (max %d)\n", thread_id, (int)row, (int)dims[0], (int)col, (int)dims[1]);
      while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > numThreads) {
	// only have to be approximate, don't worry about races
	// fprintf(stdout, "Sleeping ...\n");
	sleep(1);
      }
      thread_flags[thread_id] = 1;
      thread_status = pthread_create(&thread, NULL, compute_norm, (void *)&my_args[thread_id]);
      // compute_norm((void *)&my_args[thread_id]);
      assert (thread_status >= 0);
      thread_id++;

      col += chunks[1];
      //fflush(stdout);
    }
    row += chunks[0];
  }
  while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > 0) {
    // wait for the threads to finish
    // fprintf(stdout, "Waiting ...\n");
    sleep(1);
  }

  cout << "Done." << endl;
  pthread_exit(NULL);
}