int main(int argc, const char *argv[]) { OptArgs opts; opts.ParseCmdLine(argc, argv); bool help; string topFile, bottomFile, outFile; opts.GetOption(topFile, "", '-', "top"); opts.GetOption(bottomFile, "", '-', "bottom"); opts.GetOption(outFile, "", '-', "merged"); opts.GetOption(help, "false", 'h', "help"); if (help || argc == 1) { usage(); } ION_ASSERT(!topFile.empty() && !bottomFile.empty() && !outFile.empty(), "Need top, bottom and merged files. use --help for details."); MergeAcq merger; Image top; Image bottom; Image combo; cout << "Loading images." << endl; ION_ASSERT(top.LoadRaw(topFile.c_str()), "Couldn't load file."); ION_ASSERT(bottom.LoadRaw(bottomFile.c_str()), "Couldn't load file."); merger.SetFirstImage(&bottom); merger.SetSecondImage(&top, bottom.GetRows(), 0); // starting vertically raised but columns the same. cout << "Merging." << endl; merger.Merge(combo); Acq acq; cout << "Saving. " << endl; acq.SetData(&combo); acq.WriteVFC(outFile.c_str(), 0, 0, combo.GetCols(), combo.GetRows()); cout << "Done." << endl; return 0; }
int main(int argc, const char *argv[]) { OptArgs opts; opts.ParseCmdLine(argc, argv); string queryFile, goldFile; double epsilon; bool help = false; bool version = false; int allowedWrong = 0; double maxAbsVal = 0; double minCorrelation = 1; opts.GetOption(queryFile, "", 'q', "query-wells"); opts.GetOption(goldFile, "", 'g', "gold-wells"); opts.GetOption(epsilon, "0.0", 'e', "epsilon"); opts.GetOption(allowedWrong, "0", 'm', "max-mismatch"); opts.GetOption(minCorrelation, "1", 'c', "min-cor"); opts.GetOption(maxAbsVal, "1e3", '-', "max-val"); opts.GetOption(help, "false", 'h', "help"); opts.GetOption(version, "false", 'v', "version"); opts.CheckNoLeftovers(); if (version) { fprintf (stdout, "%s", IonVersion::GetFullVersion("RawWellsEquivalent").c_str()); exit(0); } if (queryFile.empty() || goldFile.empty() || help) { cout << "RawWellsEquivalent - Check to see how similar two wells files are to each other" << endl << "options: " << endl << " -g,--gold-wells trusted wells to compare against." << endl << " -q,--query-wells new wells to check." << endl << " -e,--epsilon maximum allowed difference to be considered equivalent." << endl << " -m,--max-mixmatch maximum number of non-equivalent entries to allow." << endl << " -c,--min-cor minimum correlation allowed to be considered equivalent." << endl << " --max-val maximum absolute value considered (avoid extreme values)." << endl << " -h,--help this message." << endl << "" << endl << "usage: " << endl << " RawWellsEquivalent -e 10 --query-wells query.wells --gold-wells gold.wells " << endl; exit(1); } NumericalComparison<double> compare = CompareWells(queryFile, goldFile, epsilon, maxAbsVal); cout << compare.GetCount() << " total values. " << endl << compare.GetNumSame() << " (" << (100.0 * compare.GetNumSame())/compare.GetCount() << "%) are equivalent. " << endl << compare.GetNumDiff() << " (" << (100.0 * compare.GetNumDiff())/compare.GetCount() << "%) are not equivalent. " << endl << "Correlation of: " << compare.GetCorrelation() << endl; if((compare.GetCount() - allowedWrong) >= compare.GetNumSame() || compare.GetCorrelation() < minCorrelation) { cout << "Wells files not equivalent for allowed mismatch: " << allowedWrong << " minimum correlation: " << minCorrelation << endl; return 1; } cout << "Wells files equivalent for allowed mismatch: " << allowedWrong << " minimum correlation: " << minCorrelation << endl; return 0; }
int main(int argc, const char *argv[]) { OptArgs opts; opts.ParseCmdLine(argc, argv); string queryFile, goldFile; double epsilon; bool help = false; bool version = false; int allowedWrong = 0; double maxAbsVal = 0; double minCorrelation = 1; bool dumpMisMatch = false; opts.GetOption(queryFile, "", 'q', "query-wells"); opts.GetOption(goldFile, "", 'g', "gold-wells"); opts.GetOption(epsilon, "0.0", 'e', "epsilon"); opts.GetOption(allowedWrong, "0", 'm', "max-mismatch"); opts.GetOption(minCorrelation, "1", 'c', "min-cor"); opts.GetOption(maxAbsVal, "1e3", '-', "max-val"); opts.GetOption(help, "false", 'h', "help"); opts.GetOption(version, "false", 'v', "version"); opts.GetOption(dumpMisMatch, "false", 'o', "dump-mismatch"); opts.CheckNoLeftovers(); if (version) { fprintf (stdout, "%s", IonVersion::GetFullVersion("RawWellsEquivalent").c_str()); exit(0); } if (queryFile.empty() || goldFile.empty() || help) { printUsage(); exit(1); } DumpMismatches dump(dumpMisMatch); NumericalComparison<double> compare = CompareWells(queryFile, goldFile, epsilon, maxAbsVal, dump); cout << compare.GetCount() << " total values. " << endl << compare.GetNumSame() << " (" << (100.0 * compare.GetNumSame())/compare.GetCount() << "%) are equivalent. " << endl << compare.GetNumDiff() << " (" << (100.0 * compare.GetNumDiff())/compare.GetCount() << "%) are not equivalent. " << endl << "Correlation of: " << compare.GetCorrelation() << endl; if((compare.GetCount() - allowedWrong) > compare.GetNumSame() || (compare.GetCorrelation() < minCorrelation && compare.GetCount() != compare.GetNumSame())) { cout << "Wells files not equivalent for allowed mismatch: " << allowedWrong << " minimum correlation: " << minCorrelation << endl; return 1; } cout << "Wells files equivalent for allowed mismatch: " << allowedWrong << " minimum correlation: " << minCorrelation << endl; return 0; }
void ExtendParameters::SetupFileIO(OptArgs &opts) { // freeBayes slot fasta = opts.GetFirstString('r', "reference", ""); if (fasta.empty()) { cerr << "Fatal ERROR: Reference file not specified via -r" << endl; exit(1); } ValidateAndCanonicalizePath(fasta); // freeBayes slot variantPriorsFile = opts.GetFirstString('c', "input-vcf", ""); if (variantPriorsFile.empty()) { cerr << "INFO: No input VCF (Hotspot) file specified via -c,--input-vcf" << endl; } else ValidateAndCanonicalizePath(variantPriorsFile); sseMotifsFileName = opts.GetFirstString('e', "error-motifs", ""); sseMotifsProvided = true; if (sseMotifsFileName.empty()) { sseMotifsProvided = false; cerr << "INFO: Systematic error motif file not specified via -e" << endl; } else ValidateAndCanonicalizePath(sseMotifsFileName); opts.GetOption(bams, "", 'b', "input-bam"); if (bams.empty()) { cerr << "FATAL ERROR: BAM file not specified via -b" << endl; exit(-1); } for (unsigned int i_bam = 0; i_bam < bams.size(); ++i_bam) ValidateAndCanonicalizePath(bams[i_bam]); outputDir = opts.GetFirstString('O', "output-dir", "."); ValidateAndCanonicalizePath(outputDir); outputFile = opts.GetFirstString('o', "output-vcf", ""); if (outputFile.empty()) { cerr << "Fatal ERROR: Output VCF filename not specified via -o" << endl; exit(1); } // Are those file names? postprocessed_bam = opts.GetFirstString('-', "postprocessed-bam", ""); sampleName = opts.GetFirstString('g', "sample-name", ""); force_sample_name = opts.GetFirstString('-', "force-sample-name", ""); }
int main (int argc, const char *argv[]) { printf ("------------- bamrealignment --------------\n"); OptArgs opts; opts.ParseCmdLine(argc, argv); vector<int> score_vals(4); string input_bam = opts.GetFirstString ('i', "input", ""); string output_bam = opts.GetFirstString ('o', "output", ""); opts.GetOption(score_vals, "4,-6,-5,-2", 's', "scores"); int clipping = opts.GetFirstInt ('c', "clipping", 2); bool anchors = opts.GetFirstBoolean ('a', "anchors", true); int bandwidth = opts.GetFirstInt ('b', "bandwidth", 10); bool verbose = opts.GetFirstBoolean ('v', "verbose", false); bool debug = opts.GetFirstBoolean ('d', "debug", false); int format = opts.GetFirstInt ('f', "format", 1); int num_threads = opts.GetFirstInt ('t', "threads", 8); string log_fname = opts.GetFirstString ('l', "log", ""); if (input_bam.empty() or output_bam.empty()) return PrintHelp(); opts.CheckNoLeftovers(); std::ofstream logf; if (log_fname.size ()) { logf.open (log_fname.c_str ()); if (!logf.is_open ()) { fprintf (stderr, "bamrealignment: Failed to open log file %s\n", log_fname.c_str()); return 1; } } BamReader reader; if (!reader.Open(input_bam)) { fprintf(stderr, "bamrealignment: Failed to open input file %s\n", input_bam.c_str()); return 1; } SamHeader header = reader.GetHeader(); RefVector refs = reader.GetReferenceData(); BamWriter writer; writer.SetNumThreads(num_threads); if (format == 1) writer.SetCompressionMode(BamWriter::Uncompressed); else writer.SetCompressionMode(BamWriter::Compressed); if (!writer.Open(output_bam, header, refs)) { fprintf(stderr, "bamrealignment: Failed to open output file %s\n", output_bam.c_str()); return 1; } // The meat starts here ------------------------------------ if (verbose) cout << "Verbose option is activated, each alignment will print to screen." << endl << " After a read hit RETURN to continue to the next one," << endl << " or press q RETURN to quit the program," << endl << " or press s Return to silence verbose," << endl << " or press c RETURN to continue printing without further prompt." << endl << endl; unsigned int readcounter = 0; unsigned int mapped_readcounter = 0; unsigned int realigned_readcounter = 0; unsigned int modified_alignment_readcounter = 0; unsigned int pos_update_readcounter = 0; unsigned int failed_clip_realigned_readcount = 0; unsigned int already_perfect_readcount = 0; unsigned int bad_md_tag_readcount = 0; unsigned int error_recreate_ref_readcount = 0; unsigned int error_clip_anchor_readcount = 0; unsigned int error_sw_readcount = 0; unsigned int error_unclip_readcount = 0; unsigned int start_position_shift; int orig_position; int new_position; string md_tag, new_md_tag, input = "x"; vector<CigarOp> new_cigar_data; vector<MDelement> new_md_data; bool position_shift = false; time_t start_time = time(NULL); Realigner aligner; aligner.verbose_ = verbose; aligner.debug_ = debug; if (!aligner.SetScores(score_vals)) cout << "bamrealignment: Four scores need to be provided: match, mismatch, gap open, gap extend score!" << endl; aligner.SetAlignmentBandwidth(bandwidth); BamAlignment alignment; while(reader.GetNextAlignment(alignment)){ readcounter ++; position_shift = false; if ( (readcounter % 100000) == 0 ) cout << "Processed " << readcounter << " reads. Elapsed time: " << (time(NULL) - start_time) << endl; if (alignment.IsMapped()) { orig_position = alignment.Position; mapped_readcounter++; aligner.SetClipping(clipping, !alignment.IsReverseStrand()); if (aligner.verbose_) { cout << endl; if (alignment.IsReverseStrand()) cout << "The read is from the reverse strand." << endl; else cout << "The read is from the forward strand." << endl; } if (!alignment.GetTag("MD", md_tag)) { if (aligner.verbose_) cout << "Warning: Skipping read " << alignment.Name << ". It is mapped but missing MD tag." << endl; if (logf.is_open ()) logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "MISSMD" << '\n'; bad_md_tag_readcount++; } else if (aligner.CreateRefFromQueryBases(alignment.QueryBases, alignment.CigarData, md_tag, anchors)) { bool clipfail = false; if (Realigner::CR_ERR_CLIP_ANCHOR == aligner.GetCreateRefError ()) { clipfail = true; failed_clip_realigned_readcount ++; } if (!aligner.computeSWalignment(new_cigar_data, new_md_data, start_position_shift)) { if (aligner.verbose_) cout << "Error in the alignment! Not updating read information." << endl; if (logf.is_open ()) logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "SWERR" << '\n'; error_sw_readcount++; writer.SaveAlignment(alignment); // Write alignment unchanged continue; } if (!aligner.addClippedBasesToTags(new_cigar_data, new_md_data, alignment.QueryBases.size())) { if (aligner.verbose_) cout << "Error when adding clipped anchors back to tags! Not updating read information." << endl; if (logf.is_open ()) logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "UNCLIPERR" << '\n'; writer.SaveAlignment(alignment); // Write alignment unchanged error_unclip_readcount ++; continue; } new_md_tag = aligner.GetMDstring(new_md_data); realigned_readcounter++; // adjust start position of read if (!aligner.LeftAnchorClipped() and start_position_shift != 0) { new_position = aligner.updateReadPosition(alignment.CigarData, (int)start_position_shift, alignment.Position); if (new_position != alignment.Position) { pos_update_readcounter++; position_shift = true; alignment.Position = new_position; } } if (position_shift || alignment.CigarData.size () != new_cigar_data.size () || md_tag != new_md_tag) { if (logf.is_open ()) { logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "MOD"; if (position_shift) logf << "-SHIFT"; if (clipfail) logf << " NOCLIP"; logf << '\n'; } modified_alignment_readcounter++; } else { if (logf.is_open ()) { logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "UNMOD"; if (clipfail) logf << " NOCLIP"; logf << '\n'; } } if (aligner.verbose_){ cout << alignment.Name << endl; cout << "------------------------------------------" << endl; // Wait for input to continue or quit program if (input.size() == 0) input = 'x'; else if (input[0] != 'c' and input[0] != 'C') getline(cin, input); if (input.size()>0){ if (input[0] == 'q' or input[0] == 'Q') return 1; else if (input[0] == 's' or input[0] == 'S') aligner.verbose_ = false; } } // Finally update alignment information alignment.CigarData = new_cigar_data; alignment.EditTag("MD", "Z" , new_md_tag); } // end of CreateRef else if else { switch (aligner.GetCreateRefError ()) { case Realigner::CR_ERR_RECREATE_REF: if (logf.is_open ()) logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "RECRERR" << '\n'; error_recreate_ref_readcount++; break; case Realigner::CR_ERR_CLIP_ANCHOR: if (logf.is_open ()) logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "CLIPERR" << '\n'; error_clip_anchor_readcount++; break; default: // On a good run this writes way too many reads to the log file - don't want to create a too large txt file // if (logf.is_open ()) //logf << alignment.Name << '\t' << alignment.IsReverseStrand() << '\t' << alignment.RefID << '\t' << setfill ('0') << setw (8) << orig_position << '\t' << "PERFECT" << '\n'; already_perfect_readcount++; break; } if (aligner.verbose_) { cout << alignment.Name << endl; cout << "------------------------------------------" << endl; // Wait for input to continue or quit program if (input.size() == 0) input = 'x'; else if (input[0] != 'c' and input[0] != 'C') getline(cin, input); if (input.size()>0){ if (input[0] == 'q' or input[0] == 'Q') return 1; else if (input[0] == 's' or input[0] == 'S') aligner.verbose_ = false; } } } // --- Debug output for Rajesh --- if (debug && aligner.invalid_cigar_in_input) { aligner.verbose_ = true; cout << "Invalid cigar string / md tag pair in read " << alignment.Name << endl; // Rerun reference generation to display error aligner.CreateRefFromQueryBases(alignment.QueryBases, alignment.CigarData, md_tag, anchors); aligner.verbose_ = verbose; aligner.invalid_cigar_in_input = false; } // --- --- --- } // end of if isMapped writer.SaveAlignment(alignment); } // end while loop over reads if (aligner.invalid_cigar_in_input) cerr << "WARNING bamrealignment: There were invalid cigar string / md tag pairs in the input bam file." << endl; // ---------------------------------------------------------------- // program end -- output summary information cout << " File: " << input_bam << endl << " Total reads: " << readcounter << endl << " Mapped reads: " << mapped_readcounter << endl; if (bad_md_tag_readcount) cout << " Skipped: bad MD tags: " << bad_md_tag_readcount << endl; if (error_recreate_ref_readcount) cout << " Skipped: unable to recreate ref: " << error_recreate_ref_readcount << endl; if (error_clip_anchor_readcount) cout << " Skipped: error clipping anchor: " << error_clip_anchor_readcount << endl; cout << " Skipped: already perfect: " << already_perfect_readcount << endl << " Total reads realigned: " << mapped_readcounter - already_perfect_readcount - bad_md_tag_readcount - error_recreate_ref_readcount - error_clip_anchor_readcount << endl; if (failed_clip_realigned_readcount) cout << " (including " << failed_clip_realigned_readcount << " that failed to clip)" << endl; if (error_sw_readcount) cout << " Failed to complete SW alignment: " << error_sw_readcount << endl; if (error_unclip_readcount) cout << " Failed to unclip anchor: " << error_unclip_readcount << endl; cout << " Succesfully realigned: " << realigned_readcounter << endl << " Modified alignments: " << modified_alignment_readcounter << endl << " Shifted position: " << pos_update_readcounter << endl; cout << "Processing time: " << (time(NULL)-start_time) << " seconds." << endl; cout << "INFO: The output BAM file may be unsorted." << endl; cout << "------------------------------------------" << endl; return 0; }
int main(int argc, const char *argv[]) { OptArgs opts; TraceConfig config; string inputDir; string outputDir; bool help; opts.ParseCmdLine(argc, argv); opts.GetOption(inputDir, "", '-', "source-dir"); opts.GetOption(outputDir, "", '-', "output-dir"); opts.GetOption(config.precision, "5", '-', "precision"); opts.GetOption(config.numEvec, "7", '-', "num-evec"); opts.GetOption(config.doDebug, "false", '-', "debug-files"); opts.GetOption(config.compressionType, "delta", '-', "compression"); opts.GetOption(config.numFlows, "-1", '-', "num-flows"); opts.GetOption(config.numCores, "6", '-', "num-cores"); opts.GetOption(config.errCon,"0",'-',"err-con"); opts.GetOption(config.rankGood,"0",'-',"rank-good"); opts.GetOption(config.pivot,"0",'-',"pivot"); opts.GetOption(help, "false", 'h', "help"); opts.GetOption(config.isThumbnail, "false", '-', "thumbnail"); opts.GetOption(config.use_hard_est, "false",'-', "use-hard-est"); opts.GetOption(config.t0_hard, "0", '-', "t0-hard"); opts.GetOption(config.tmid_hard, "0", '-', "tmid-hard"); opts.GetOption(config.sigma_hard, "0", '-', "sigma-hard"); opts.GetOption(config.row_step, "100", '-', "row-step"); opts.GetOption(config.col_step, "100", '-', "col-step"); opts.GetOption(config.bg_param, "", '-', "region-param"); opts.GetOption(config.grind_acq_0, "0", '-', "grind-acq0"); if(help || inputDir.empty() || outputDir.empty()) { usage(); } char *explog_path = NULL; explog_path = MakeExpLogPathFromDatDir(inputDir.c_str()); int numFlows = config.numFlows; if (numFlows < 0) { numFlows = GetTotalFlows(explog_path); } // Check and setup our compression type TraceChunkSerializer serializer; serializer.SetRecklessAbandon(true); if (config.compressionType == "svd") { SvdDatCompress *dc = new SvdDatCompress(config.precision, config.numEvec); serializer.SetCompressor(dc); cout << "Doing lossy svd compression. (" << serializer.GetCompressionType() << ")" << endl; } // else if (config.compressionType == "svd+") { // SvdDatCompressPlus *dc = new SvdDatCompressPlus(); // serializer.SetCompressor(dc); // cout << "Doing lossy svd compression. (" << serializer.GetCompressionType() << ")" << endl; // } // else if (config.compressionType == "svd++") { // SvdDatCompressPlusPlus *dc = new SvdDatCompressPlusPlus(); // if (config.errCon >0 ) // dc->SetErrCon(config.errCon); // if (config.rankGood > 0 ) // dc->SetRankGood(config.rankGood); // if (config.pivot > 0) // dc->SetPivot(config.pivot); // serializer.SetCompressor(dc); // cout << "Doing lossy svd compression for good traces and delta for bad ones. (" << serializer.GetCompressionType() << ")" << endl; // } else if (config.compressionType == "delta") { VencoLossless *venco = new VencoLossless(); serializer.SetCompressor(venco); cout << "Doing lossless delta compression. (" << serializer.GetCompressionType() << ")" << endl; } else if (config.compressionType == "delta-plain") { DeltaComp *delta = new DeltaComp(); serializer.SetCompressor(delta); cout << "Doing lossless delta plain compression. (" << serializer.GetCompressionType() << ")" << endl; } else if (config.compressionType == "delta-plain-fst") { DeltaCompFst *delta = new DeltaCompFst(); serializer.SetCompressor(delta); cout << "Doing lossless delta plain fast compression. (" << serializer.GetCompressionType() << ")" << endl; } else if (config.compressionType == "delta-plain-fst-smx") { DeltaCompFstSmX *delta = new DeltaCompFstSmX(); serializer.SetCompressor(delta); cout << "Doing lossless delta plain fast compression. (" << serializer.GetCompressionType() << ")" << endl; } else if (config.compressionType == "none") { TraceCompressor *vanilla = new TraceNoCompress(); serializer.SetCompressor(vanilla); cout << "Doing no compression. (" << serializer.GetCompressionType() << ")" << endl; } else { ION_ABORT("Don't recognize compression type: " + config.compressionType); } const char *id = GetChipId(explog_path); if (explog_path) free (explog_path); ChipIdDecoder::SetGlobalChipId(id); ImageTransformer::CalibrateChannelXTCorrection(inputDir.c_str(), "lsrowimage.dat"); Image bfImg1; string bfFile = inputDir + "/beadfind_pre_0003.dat"; bfImg1.LoadRaw(bfFile.c_str()); const RawImage *bf1raw = bfImg1.GetImage(); Mask mask(bf1raw->cols, bf1raw->rows); ImageTransformer::XTChannelCorrect(bfImg1.raw,bfImg1.results_folder); bfImg1.FilterForPinned (&mask, MaskEmpty, false); Image bfImg2; string bfFile2 = inputDir + "/beadfind_pre_0001.dat"; bfImg2.LoadRaw(bfFile2.c_str()); ImageTransformer::XTChannelCorrect(bfImg2.raw,bfImg1.results_folder); bfImg2.FilterForPinned (&mask, MaskEmpty, false); const RawImage *bf2raw = bfImg2.GetImage(); GridMesh<T0Prior> t0Prior; T0Calc bfT0; /* Calc t0 and get prior. */ cout << "Doing beadfind t0" << endl; GenerateBfT0Prior(config, bf1raw->image, bf1raw->baseFrameRate, bf1raw->rows, bf1raw->cols, bf1raw->frames, bf1raw->timestamps, config.row_step, config.col_step, &mask, bfT0, t0Prior); GridMesh<T0Prior> t0Prior2; T0Calc bfT02; GenerateBfT0Prior(config, bf2raw->image, bf2raw->baseFrameRate, bf2raw->rows, bf2raw->cols, bf2raw->frames, bf2raw->timestamps, config.row_step, config.col_step, &mask, bfT02, t0Prior2); SigmaTMidNucEstimation sigmaEst; sigmaEst.Init(config.rate_sigma_intercept, config.rate_sigma_slope, config.t0_tmid_intercept, config.t0_tmid_slope, bf1raw->baseFrameRate); GridMesh<SigmaEst> sigmaTMid; bfImg1.Close(); bfImg2.Close(); // Calculate individual well t0 by looking at neighboring regions vector<float> wellT0; bfT0.CalcIndividualT0(wellT0, 0); vector<float> wellT02; bfT02.CalcIndividualT0(wellT02, 0); for (size_t i =0; i< wellT0.size();i++) { if (wellT0[i] > 0 && wellT02[i] > 0) { wellT0[i] = (wellT0[i] + wellT02[i])/2.0f; } else { wellT0[i] = max(wellT0[i], wellT02[i]); } } // Average the region level t0, should we do this first and then just do sinle well level? for (size_t bIx = 0; bIx < bfT0.GetNumRegions(); bIx++) { double t1 = bfT0.GetT0(bIx); double t2 = bfT02.GetT0(bIx); if (t1 > 0 && t2 > 0) { t1 = (t1 + t2)/2.0; } else { t1 = max(t1,t2); } bfT0.SetT0(bIx, t1); } // Single thread first dat for (size_t datIx = 0; datIx < 1; ++datIx) { cout << "Doing: " << datIx << endl; char buffer[2048]; snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.dat", inputDir.c_str(), (int) datIx); string datFile = buffer; /* Use prior to calculate t0 and slope. */ Image datImg; T0Calc t0; datImg.LoadRaw(datFile.c_str()); // ImageTransformer::XTChannelCorrect(datImg.raw,datImg.results_folder); const RawImage *datRaw = datImg.GetImage(); /* Estimate sigma and t_mid_nuc */ if (datIx == 0) { cout << "Doing acquisition t0" << endl; GenerateAcqT0Prior(config, datRaw->image, datRaw->baseFrameRate, datRaw->rows, datRaw->cols, datRaw->frames, datRaw->timestamps, config.row_step, config.col_step, &mask, t0, t0Prior); ClockTimer timer; cout << "Estimating sigma." << endl; sigmaTMid.Init(datRaw->rows, datRaw->cols, config.row_step, config.col_step); for (size_t bIx = 0; bIx < t0.GetNumRegions(); bIx++) { t0.SetT0(bIx, bfT0.GetT0(bIx)); } int neighbors = 2; if (config.isThumbnail) { cout << "Doing thumbnail version of slope." << endl; neighbors = 1; } EstimateSigmaValue(t0, sigmaEst, sigmaTMid, neighbors); timer.PrintMilliSeconds(cout,"Sigma Est took:"); string sigmaFile = outputDir + "/sigma_tmid_est.txt"; OutputSigmaTmidEstimates(sigmaTMid, sigmaFile.c_str()); } /* For each region do shifting */ ClockTimer timer; cout << "Shifting traces" << endl; timer.StartTimer(); // ShiftTraces(bfT0, wellT0, datRaw->frames, datRaw->baseFrameRate, datRaw->timestamps, datRaw->image); timer.PrintMilliSeconds(cout,"Shift took:"); if (!config.bg_param.empty()) { DataCube<int> rowsCols; DataCube<float> tmidSigma; DataCube<float> fitTmidSigma; string path = config.bg_param + ":/region/region_location"; if (!H5File::ReadDataCube(path, rowsCols)) { ION_ABORT("Couldn't read file: " + path); } path = config.bg_param + ":/region/region_init_param"; if (!H5File::ReadDataCube(path, fitTmidSigma)) { ION_ABORT("Couldn't read file: " + path); } for (size_t i = 0; i < rowsCols.GetNumX(); i++) { int row = rowsCols.At(i,1,0); int col = rowsCols.At(i,0,0); SigmaEst &est = sigmaTMid.GetItemByRowCol(row, col); float tmid_est = fitTmidSigma.At(i,0,0); float sigma_est = fitTmidSigma.At(i,1,0); est.mTMidNuc = tmid_est; est.mSigma = sigma_est; } string fitSigmaFile = outputDir + "/bg_fit_sigma_tmid_est.txt"; OutputSigmaTmidEstimates(sigmaTMid, fitSigmaFile.c_str()); // path = config.bg_param + ":/region/region_init_param"; // if (!H5File::ReadMatrix(path, tmidSigma)) { // ION_ABORT("Couldn't read file: " + path); // } // for (size_t i = 0; i < rowsCols.n_rows; i++) { // int row = rowsCols.at(i,0); // int col = rowsCols.at(i,1); // SigmaEst &est = sigmaTMid.GetItemByRowCol(row, col); // float tmid_est = tmidSigma.at(i,0); // float sigma_est = tmidSigma.at(i,1); // est.mTMidNuc = tmid_est; // est.mSigma = sigma_est; // } // string sigmaFile = outputDir + "/supplied_sigma_tmid_est.txt"; // OutputSigmaTmidEstimates(sigmaTMid, sigmaFile.c_str()); } else if (config.use_hard_est) { for (size_t i = 0; i < bfT0.GetNumRegions(); i++) { bfT0.SetT0(i,config.t0_hard * datRaw->baseFrameRate + config.time_start_slop); } for (size_t i = 0; i < sigmaTMid.GetNumBin(); i++) { SigmaEst &est = sigmaTMid.GetItem(i); est.mTMidNuc = config.tmid_hard; est.mSigma = config.sigma_hard; est.mT0 = config.t0_hard; } } /* Use t0 and sigma to get the time compression bkgModel wants. */ cout << "Generating chunks" << endl; // GridMesh<TraceChunk> traceChunks; SynchDat sdat; if (datIx == 0 && config.grind_acq_0 > 0) { int nTimes = config.grind_acq_0; timer.StartTimer(); size_t processMicroSec = 0; size_t hdf5MicroSec = 0; size_t compressMicroSec = 0; size_t convertMicroSec = 0; for (int i = 0; i <nTimes; i++) { //GridMesh<TraceChunk> traceChunken; SynchDat sdatIn; AddMetaData(sdat, datRaw, datIx); ClockTimer convTimer; GenerateDataChunks(config, bfT0, datRaw, config.row_step, config.col_step, sigmaTMid, sdatIn.mChunks,datImg); convertMicroSec += convTimer.GetMicroSec(); snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.sdat", outputDir.c_str(), (int)datIx); serializer.Write(buffer, sdatIn); processMicroSec += serializer.computeMicroSec; hdf5MicroSec += serializer.ioMicroSec; compressMicroSec += serializer.compressMicroSec; } size_t usec = timer.GetMicroSec(); cout << "Took: " << usec / 1.0e6 << " seconds, " << usec / (nTimes * 1.0f) << " usec per write." << endl; timer.PrintMilliSeconds(cout,"Chunks took:"); cout << "Read took: " << processMicroSec / (1e3 * nTimes) << " milli seconds per sdat compute." << endl; cout << "Read took: " << hdf5MicroSec / (1e3 * nTimes) << " milli seconds per sdat hdf5." << endl; cout << "Read took: " << compressMicroSec / (1e3 * nTimes) << " milli seconds per sdat compressing." << endl; cout << "Read took: " << convertMicroSec / (1e3 * nTimes) << " milli seconds per sdat converting." << endl; exit(0); } else { timer.StartTimer(); AddMetaData(sdat, datRaw, datIx); GenerateDataChunks(config, bfT0, datRaw, config.row_step, config.col_step, sigmaTMid, sdat.mChunks,datImg); timer.PrintMilliSeconds(cout,"Chunks took:"); if (datIx == 0 && config.doDebug) { OutputTraceChunks(sdat.mChunks,"flow_0_data_chunks.txt"); } } datImg.Close(); /* Serialize onto disk. */ snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.sdat", outputDir.c_str(), (int)datIx); serializer.Write(buffer, sdat); /* Read back in first flow for checking */ if (datIx == 0) { TraceChunkSerializer readSerializer; readSerializer.SetRecklessAbandon(true); // GridMesh<TraceChunk> traceChunksIn; SynchDat sdatIn; readSerializer.Read(buffer, sdatIn); if (datIx == 0 && config.doDebug) { OutputTraceChunks(sdatIn.mChunks, "flow_0_data_chunks_read.txt"); } SampleQuantiles<float> s(50000); SampleQuantiles<float> s2(50000); SampleQuantiles<float> sAbs(50000); SampleStats<double> ss; int diffCount = 0; for (size_t bIx = 0; bIx < sdatIn.mChunks.mBins.size(); bIx++) { if (sdatIn.mChunks.mBins[bIx].mT0 != sdat.mChunks.mBins[bIx].mT0) { cout << "Got: " << sdatIn.mChunks.mBins[bIx].mT0 << " vs: " << sdat.mChunks.mBins[bIx].mT0 << endl; exit(1); } for (size_t i = 0; i < sdatIn.mChunks.mBins[bIx].mData.size(); i++) { double diff = (double)sdatIn.mChunks.mBins[bIx].mData[i] - (double)sdat.mChunks.mBins[bIx].mData[i]; if (!std::isfinite(diff)) { cout << "NaNs!!" << endl; } if (diffCount < 10 && fabs(diff) > .00001) { // != 0) { diffCount++; cout << "Bin: " << bIx << " well: " << i << " diff is: " << diff << endl; } s.AddValue(diff); sAbs.AddValue(fabs(diff)); ss.AddValue(sqrt(diff * diff)); s2.AddValue(sqrt(diff * diff)); } } cout << "Median rms: " << s2.GetMedian() << " Avg: " << ss.GetMean() << " diff: " << s.GetMedian() << endl; cout << "Abs(diff) Quantiles:" << endl; for (size_t i = 0; i <= 100; i+=10) { cout << i << "\t" << sAbs.GetQuantile(i/100.0) << endl; } } } // do the next N flows multithreaded if (numFlows > 1) { PJobQueue jQueue (config.numCores, numFlows-1); vector<CreateSDat> jobs(numFlows -1); // for (int i = 0; i < 4; i++) { // char buffer[2048]; // snprintf(buffer, sizeof(buffer), "%s/beadfind_pre_%.4d.dat", inputDir.c_str(), (int) i); // string input = buffer; // snprintf(buffer, sizeof(buffer), "%s/beadfind_pre_%.4d.sdat", outputDir.c_str(), (int)i); // string output = buffer; // jobs[i].Init(&config, input, output, &wellT0, &bfT0, &sigmaTMid); // jQueue.AddJob(jobs[i]); // } // jQueue.WaitUntilDone(); for (int i = 1; i < numFlows; i++) { char buffer[2048]; snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.dat", inputDir.c_str(), (int) i); string input = buffer; snprintf(buffer, sizeof(buffer), "%s/acq_%.4d.sdat", outputDir.c_str(), (int)i); string output = buffer; jobs[i-1].Init(&config, input, output, &wellT0, &bfT0, &sigmaTMid, i); jQueue.AddJob(jobs[i-1]); } jQueue.WaitUntilDone(); } /* Serialize into backbround models */ cout << "Done." << endl; }
int main(int argc, const char *argv[]) { OptArgs opts; opts.ParseCmdLine(argc, argv); int hpLength; string statsOut; string alignmentOut; string pairedOut; string flowsOut; string summaryOut; string samFile; string qScoreCol; string wellsFile; string bfmaskFile; string snrFile; string binnedHpSigFile; string flowErrFile; string gcErrFile; int gcWin; string flowOrder; string keySeq; int numFlows; bool help; int qLength; double colCenter; double rowCenter; int colSize; int rowSize; int sampleSize; string wellsToUse; string run1, run2; opts.GetOption(run1, "", '-', "sff1"); opts.GetOption(run2, "", '-', "sff2"); opts.GetOption(wellsToUse, "", '-', "use-wells"); opts.GetOption(samFile, "", '-', "sam-parsed"); opts.GetOption(statsOut, "", '-', "stats-out"); opts.GetOption(flowsOut, "", '-', "flows-out"); opts.GetOption(alignmentOut, "", '-', "align-out"); opts.GetOption(summaryOut, "", '-', "summary-out"); opts.GetOption(pairedOut, "", '-', "paired-out"); opts.GetOption(numFlows, "40", '-', "num-flows"); opts.GetOption(hpLength, "6", '-', "max-hp"); opts.GetOption(qScoreCol, "q7Len", '-', "qscore-col"); opts.GetOption(qLength, "25", '-', "min-qlength"); opts.GetOption(help, "false", 'h', "help"); opts.GetOption(wellsFile, "", '-', "wells-file"); opts.GetOption(bfmaskFile, "", '-', "bfmask-file"); opts.GetOption(snrFile, "", '-', "snr-file"); opts.GetOption(binnedHpSigFile, "", '-', "binned-hp-sig-file"); opts.GetOption(flowErrFile, "", '-', "flow-err-file"); opts.GetOption(gcErrFile, "", '-', "gc-err-file"); opts.GetOption(flowOrder, "", '-', "flow-order"); opts.GetOption(keySeq, "", '-', "key-seq"); opts.GetOption(colCenter, "0.5", '-', "col-center"); opts.GetOption(rowCenter, "0.5", '-', "row-center"); opts.GetOption(colSize, "0", '-', "col-size"); opts.GetOption(rowSize, "0", '-', "row-size"); opts.GetOption(gcErrFile, "", '-', "gc-err-file"); opts.GetOption(gcWin, "40", '-', "gc-win"); opts.GetOption(sampleSize, "100000", '-', "sample-size"); if (help || samFile.empty() || statsOut.empty() || summaryOut.empty()) { usage(); } opts.CheckNoLeftovers(); // Some checks to make sure sensible bounds have been set if(colCenter < 0 || colCenter > 1) { cerr << "AnalyzeHPErrs - col-center must be in the range [0,1]" << endl; exit(1); } if(rowCenter < 0 || rowCenter > 1) { cerr << "AnalyzeHPErrs - row-center must be in the range [0,1]" << endl; exit(1); } if(colSize < 0) { cerr << "AnalyzeHPErrs - col-size cannot be negative." << endl; exit(1); } if(rowSize < 0) { cerr << "AnalyzeHPErrs - row-size cannot be negative." << endl; exit(1); } // Determine rows & cols if a bfmask file was supplied int nRow=0; int nCol=0; if(!bfmaskFile.empty()) { if(GetRowColFromBfmask(bfmaskFile, &nRow, &nCol)) { cerr << "AnalyzeHPErrs - problem determining rows & columns from bfmask file " << bfmaskFile << endl; exit(1); } } // Set up fds object FlowDiffStats* fds; if (!run1.empty()) { SffDiffStats* sds = new SffDiffStats(hpLength, nCol, nRow, qScoreCol, run1, run2); if (!pairedOut.empty()) sds->SetPairedOut(pairedOut); fds = dynamic_cast<FlowDiffStats*>(sds); } else { GenomeDiffStats* gds = new GenomeDiffStats(hpLength, nCol, nRow, qScoreCol); if(alignmentOut != "") { gds->SetAlignmentsOut(alignmentOut); } if (!flowsOut.empty()) { gds->SetFlowsOut(flowsOut); } fds = dynamic_cast<FlowDiffStats*>(gds); } if (gcErrFile != "") { fds->SetFlowGCOut(gcErrFile); fds->SetGCWindowSize(gcWin); } if(keySeq != "") { fds->SetKeySeq(keySeq); } if(flowOrder != "") { fds->SetFlowOrder(flowOrder); } fds->SetStatsOut(statsOut); if (!wellsToUse.empty()) { std::vector<int> wells; std::vector<bool> use; ReadSetFromFile(wellsToUse, 0, wells); use.resize(nRow * nCol, false); int count = 0; ReservoirSample<int> wellSample(sampleSize); for (size_t i = 0; i < wells.size(); i++) { wellSample.Add(wells[i]); } wells = wellSample.GetData(); for (size_t i = 0; i < wells.size(); i++) { use[wells[i]] = true; count++; } cout << "Read: " << count << " reads." << endl; fds->SetWellToAnalyze(use); } // Set integer-value row & column bounds int minRow=-1; int maxRow=-1; int minCol=-1; int maxCol=-1; if(colSize > 0 || rowSize > 0) { if(bfmaskFile.empty()) { cerr << "AnalyzeHPErrs - must specify bfmask file when restricting row or column ranges" << endl; exit(1); } if(rowSize > 0) { minRow = floor(nRow * rowCenter - rowSize / 2.0); maxRow = minRow + rowSize; minRow = std::max(0,minRow); maxRow = std::min(nRow,maxRow); } if(colSize > 0) { minCol = floor(nCol * colCenter - colSize / 2.0); maxCol = minCol + colSize; minCol = std::max(0,minCol); maxCol = std::min(nCol,maxCol); } } if (wellsFile != "") { std::vector<int32_t> xSubset, ySubset; fds->FillInSubset(samFile, qLength, minRow, maxRow, minCol, maxCol, xSubset, ySubset); if(bfmaskFile.empty()) { cerr << "AnalyzeHPErrs - must specify bfmask file when specifying wells file" << endl; exit(1); } fds->SetWellsFile(wellsFile, nRow, nCol, numFlows, xSubset, ySubset); } if (snrFile != "") { cout << "Opening snr file: " << snrFile << endl; fds->SetSNROut(snrFile); } if (binnedHpSigFile != "") { cout << "Opening binned HP signal file: " << binnedHpSigFile << endl; fds->SetBinnedHpSigOut(binnedHpSigFile); } if (flowErrFile != "") { cout << "Opening flow err file: " << flowErrFile << endl; fds->SetFlowErrOut(flowErrFile); } ofstream summary; summary.open(summaryOut.c_str()); cout << "Reading and analyzing alignments from: " << samFile << endl; if(minCol > -1 || maxCol > -1) cout << " Restricting to " << (maxCol-minCol) << " cols in the range [" << minCol << "," << maxCol << ")" << endl; if(minRow > -1 || maxRow > -1) cout << " Restricting to " << (maxRow-minRow) << " rows in the range [" << minRow << "," << maxRow << ")" << endl; fds->SetAlignmentInFile(samFile); fds->FilterAndCompare(numFlows, summary, qLength, minRow, maxRow, minCol, maxCol); summary.close(); delete fds; cout << "Done." << endl; return 0; }
int main(int argc, const char *argv[]) { OptArgs opts; opts.ParseCmdLine(argc, argv); bool help, combineSffs; string sffFile; string bamFile; vector<string> infiles; opts.GetOption(help,"false", 'h', "help"); opts.GetOption(combineSffs,"false", 'c', "combine-sffs"); opts.GetOption(bamFile,"",'o',"out-filename"); opts.GetLeftoverArguments(infiles); if(help || infiles.empty()) { usage(); } if((!combineSffs) && infiles.size() > 1) { cerr << "sff2bam ERROR: if you want to combine all sff files into a single bam file, please use option -c true." << endl; usage(); } sffFile= infiles.front(); if(bamFile.length() < 1) { bamFile = sffFile.substr(0, sffFile.length() - 3); bamFile += "bam"; } sff_file_t* sff_file = sff_fopen(sffFile.c_str(), "r", NULL, NULL); if(!sff_file) { cerr << "sff2bam ERROR: fail to open " << sffFile << endl; exit(1); } // All sff files must have the same flow and key if(combineSffs && infiles.size() > 1) { for(size_t n = 1; n < infiles.size(); ++n) { sff_file_t* sff_file2 = sff_fopen(infiles[n].c_str(), "r", NULL, NULL); if(!sff_file2) { sff_fclose(sff_file); cerr << "sff2bam ERROR: fail to open " << infiles[n] << endl; exit(1); } if(strcmp(sff_file2->header->flow->s, sff_file->header->flow->s) != 0 || strcmp(sff_file2->header->key->s, sff_file->header->key->s) != 0) { sff_fclose(sff_file); sff_fclose(sff_file2); cerr << "sff2bam ERROR: " << sffFile << " and " << infiles[n] << " have different flows or keys." << endl; exit(1); } sff_fclose(sff_file2); } } sff_t* sff = NULL; // Open 1st read for read group name sff = sff_read(sff_file); if(!sff) { sff_fclose(sff_file); cerr << "sff2bam ERROR: fail to read " << sffFile << endl; exit(1); } // Set up BAM header SamHeader sam_header; sam_header.Version = "1.4"; sam_header.SortOrder = "unsorted"; SamProgram sam_program("sff2bam"); sam_program.Name = "sff2bam"; sam_program.Version = SFF2BAM_VERSION; sam_program.CommandLine = "sff2bam"; sam_header.Programs.Add(sam_program); string rgname = sff->rheader->name->s; int index = rgname.find(":"); rgname = rgname.substr(0, index); SamReadGroup read_group(rgname); read_group.FlowOrder = sff->gheader->flow->s; read_group.KeySequence = sff->gheader->key->s; sam_header.ReadGroups.Add(read_group); RefVector refvec; BamWriter bamWriter; bamWriter.SetCompressionMode(BamWriter::Compressed); if(!bamWriter.Open(bamFile, sam_header, refvec)) { sff_fclose(sff_file); cerr << "sff2bam ERROR: failed to open " << bamFile << endl; exit(1); } // Save 1st read BamAlignment bam_alignment0; bam_alignment0.SetIsMapped(false); bam_alignment0.Name = sff->rheader->name->s; size_t nBases = sff->rheader->n_bases + 1 - sff->rheader->clip_qual_left; if(sff->rheader->clip_qual_right > 0) { nBases = sff->rheader->clip_qual_right - sff->rheader->clip_qual_left; } if(nBases > 0) { bam_alignment0.QueryBases.reserve(nBases); bam_alignment0.Qualities.reserve(nBases); for (int base = sff->rheader->clip_qual_left - 1; base < sff->rheader->clip_qual_right - 1; ++base) { bam_alignment0.QueryBases.push_back(sff->read->bases->s[base]); bam_alignment0.Qualities.push_back(sff->read->quality->s[base] + 33); } } int clip_flow = 0; for (unsigned int base = 0; base < sff->rheader->clip_qual_left && base < sff->rheader->n_bases; ++base) { clip_flow += sff->read->flow_index[base]; } if (clip_flow > 0) { clip_flow--; } bam_alignment0.AddTag("RG","Z", rgname); bam_alignment0.AddTag("PG","Z", string("sff2bam")); bam_alignment0.AddTag("ZF","i", clip_flow); // TODO: trim flow vector<uint16_t> flowgram0(sff->gheader->flow_length); copy(sff->read->flowgram, sff->read->flowgram + sff->gheader->flow_length, flowgram0.begin()); bam_alignment0.AddTag("FZ", flowgram0); sff_destroy(sff); sff = NULL; bamWriter.SaveAlignment(bam_alignment0); // Save rest reads while(NULL != (sff = sff_read(sff_file))) { BamAlignment bam_alignment; bam_alignment.SetIsMapped(false); bam_alignment.Name = sff->rheader->name->s; nBases = sff->rheader->n_bases + 1 - sff->rheader->clip_qual_left; if(sff->rheader->clip_qual_right > 0) { nBases = sff->rheader->clip_qual_right - sff->rheader->clip_qual_left; } if(nBases > 0) { bam_alignment.QueryBases.reserve(nBases); bam_alignment.Qualities.reserve(nBases); for (int base = sff->rheader->clip_qual_left - 1; base < sff->rheader->clip_qual_right - 1; ++base) { bam_alignment.QueryBases.push_back(sff->read->bases->s[base]); bam_alignment.Qualities.push_back(sff->read->quality->s[base] + 33); } } clip_flow = 0; for (unsigned int base = 0; base <= sff->rheader->clip_qual_left && base < sff->rheader->n_bases; ++base) { clip_flow += sff->read->flow_index[base]; } if (clip_flow > 0) { clip_flow--; } bam_alignment.AddTag("RG","Z", rgname); bam_alignment.AddTag("PG","Z", string("sff2bam")); bam_alignment.AddTag("ZF","i", clip_flow); // TODO: trim flow vector<uint16_t> flowgram(sff->gheader->flow_length); copy(sff->read->flowgram, sff->read->flowgram + sff->gheader->flow_length, flowgram.begin()); bam_alignment.AddTag("FZ", flowgram); sff_destroy(sff); sff = NULL; bamWriter.SaveAlignment(bam_alignment); } sff_fclose(sff_file); if(combineSffs && infiles.size() > 1) { for(size_t n = 1; n < infiles.size(); ++n) { sff_file_t* sff_file2 = sff_fopen(infiles[n].c_str(), "r", NULL, NULL); while(NULL != (sff = sff_read(sff_file2))) { BamAlignment bam_alignment; bam_alignment.SetIsMapped(false); bam_alignment.Name = sff->rheader->name->s; nBases = sff->rheader->n_bases + 1 - sff->rheader->clip_qual_left; if(sff->rheader->clip_qual_right > 0) { nBases = sff->rheader->clip_qual_right - sff->rheader->clip_qual_left; } if(nBases > 0) { bam_alignment.QueryBases.reserve(nBases); bam_alignment.Qualities.reserve(nBases); for (int base = sff->rheader->clip_qual_left - 1; base < sff->rheader->clip_qual_right - 1; ++base) { bam_alignment.QueryBases.push_back(sff->read->bases->s[base]); bam_alignment.Qualities.push_back(sff->read->quality->s[base] + 33); } } clip_flow = 0; for (unsigned int base = 0; base <= sff->rheader->clip_qual_left && base < sff->rheader->n_bases; ++base) { clip_flow += sff->read->flow_index[base]; } if (clip_flow > 0) { clip_flow--; } bam_alignment.AddTag("RG","Z", rgname); bam_alignment.AddTag("PG","Z", string("sff2bam")); bam_alignment.AddTag("ZF","i", clip_flow); // TODO: trim flow vector<uint16_t> flowgram(sff->gheader->flow_length); copy(sff->read->flowgram, sff->read->flowgram + sff->gheader->flow_length, flowgram.begin()); bam_alignment.AddTag("FZ", flowgram); sff_destroy(sff); sff = NULL; bamWriter.SaveAlignment(bam_alignment); } sff_fclose(sff_file2); } } bamWriter.Close(); return 0; }
int main (int argc, const char *argv[]) { if (argc == 1) { printf ("BaseCallerLite - Bare bone basecaller\n"); printf ("\n"); printf ("Usage:\n"); printf ("BaseCallerLite [options]\n"); printf ("\tOptions:\n"); printf ("\t\tComing soon\n"); printf ("\n"); return 1; } string libKey = "TCAG"; string inputDirectory = "."; string outputDirectory = "."; bool singleCoreCafie = false; BaseCallerLite basecaller; basecaller.regionXSize = 50; basecaller.regionYSize = 50; basecaller.runId = "BCLTE"; basecaller.CF = 0.0; basecaller.IE = 0.0; basecaller.numWellsCalled = 0; basecaller.nextRegionX = 0; basecaller.nextRegionY = 0; OptArgs opts; opts.ParseCmdLine(argc, argv); opts.GetOption(basecaller.CF, "0.0", '-', "cf"); opts.GetOption(basecaller.IE, "0.0", '-', "ie"); opts.GetOption(inputDirectory, ".", '-', "input-dir"); opts.GetOption(outputDirectory, ".", '-', "output-dir"); opts.GetOption(singleCoreCafie, "false", '-', "singlecorecafie"); int numWorkers = 2*numCores(); if (singleCoreCafie) numWorkers = 1; Mask mask (1, 1); if (mask.SetMask ((inputDirectory + "/bfmask.bin").c_str())) exit (EXIT_FAILURE); RawWells wells (inputDirectory.c_str(),"1.wells"); //SetWellsToLiveBeadsOnly(wells,&mask); wells.OpenForIncrementalRead(); basecaller.maskPtr = &mask; basecaller.wellsPtr = &wells; basecaller.rows = mask.H(); basecaller.cols = mask.W(); basecaller.flowOrder.SetFlowOrder(wells.FlowOrder(), wells.NumFlows()); basecaller.numFlows = wells.NumFlows(); basecaller.numRegionsX = (basecaller.cols + basecaller.regionXSize - 1) / basecaller.regionXSize; basecaller.numRegionsY = (basecaller.rows + basecaller.regionYSize - 1) / basecaller.regionYSize; basecaller.numRegions = basecaller.numRegionsX * basecaller.numRegionsY; basecaller.libKeyFlows.assign(basecaller.numFlows,0); basecaller.libNumKeyFlows = basecaller.flowOrder.BasesToFlows(libKey, &basecaller.libKeyFlows[0], basecaller.numFlows); basecaller.libSFF.Open(outputDirectory+"/rawlib.sff", basecaller.numRegions, basecaller.flowOrder, libKey); time_t startBasecall; time(&startBasecall); pthread_mutex_init(&basecaller.wellsAccessMutex, NULL); pthread_t worker_id[numWorkers]; for (int iWorker = 0; iWorker < numWorkers; iWorker++) if (pthread_create(&worker_id[iWorker], NULL, BasecallerWorkerWrapper, &basecaller)) { printf("*Error* - problem starting thread\n"); return 1; } for (int iWorker = 0; iWorker < numWorkers; iWorker++) pthread_join(worker_id[iWorker], NULL); pthread_mutex_destroy(&basecaller.wellsAccessMutex); time_t endBasecall; time(&endBasecall); basecaller.libSFF.Close(); printf("\nBASECALLING: called %d of %d wells in %1.1f seconds with %d threads\n", basecaller.numWellsCalled, basecaller.rows*basecaller.cols, difftime(endBasecall,startBasecall), numWorkers); printf("Generated library SFF with %d reads\n", basecaller.libSFF.num_reads()); return 0; }
int main(int argc, const char *argv[]) { OptArgs opts; opts.ParseCmdLine(argc, argv); string regionFile; vector<string> matchStrings; vector<string> datFiles; int maskCenter = MaskEmpty; int maskMatch = MaskLive | MaskBead | MaskDud; string maskFile; string outPrefix; bool setHex; int frameStart,frameEnd; bool help; bool useDuds; int optCenter, optMatch; opts.GetOption(help, "false", 'h', "help"); opts.GetOption(regionFile, "", '-', "region-file"); opts.GetOption(datFiles, "", '-', "dat-files"); opts.GetOption(matchStrings, "", '-', "matches"); opts.GetOption(outPrefix, "", '-', "out-prefix"); opts.GetOption(useDuds, "", '-', "use-duds"); opts.GetOption(maskFile, "", '-', "mask-file"); opts.GetOption(frameStart, "14", '-', "frame-start"); opts.GetOption(frameEnd, "20", '-', "frame-end"); opts.GetOption(optCenter, "0", '-', "center"); opts.GetOption(optMatch, "0", '-', "match"); opts.GetOption(setHex, "false", '-', "set-hex"); if (useDuds) { maskMatch = MaskDud; } else if (optMatch != 0) { maskMatch = optMatch; } if (optCenter != 0) { maskCenter = optCenter; } vector<Traces> flows; vector<struct Region> regions; cout << "Loading mask." << endl; Mask mask(maskFile.c_str()); mask.SetHex(setHex); for (size_t i = 0; i < matchStrings.size(); i++) { ION_ASSERT(matchStrings[i].length() == matchStrings[0].length(), "Match strings must match in length."); } cout << "Loading regions." << endl; LoadRegions(regionFile, regions); cout << "Loading traces." << endl; LoadTraces(mask, datFiles, flows); for (size_t i = 0; i < matchStrings.size(); i++) { cout << "Using frame num: " << frameStart << " to " << frameEnd << " for match string: " << matchStrings[i] << endl; ParseMetrics(matchStrings[i], i, mask, maskCenter, maskMatch, regions, flows, outPrefix, frameStart, frameEnd); } cout << "Saw: " << centerSeen << " wells and: " << haystackNeg << " negatives." << endl; }
int RetrieveParameterVectorDouble(OptArgs &opts, Json::Value& json, char short_name, const string& long_name_hyphens, const string& default_value, vector<double>& ret_vector) { string long_name_underscores = GetRidOfDomainAndHyphens(long_name_hyphens); string value = default_value; if(value.length() > 0) { vector<string> words; split(value,',',words); ret_vector.clear(); for (size_t i = 0; i < words.size(); i++) { char *end; int err = errno; errno = 0; ret_vector.push_back(strtod(words[i].c_str(), &end)); if (errno != 0 || *end != '\0') { cout << "Error converting: " + words[i] + " to an double for option: " + long_name_hyphens << endl; return errno; } errno = err; } } string source = "builtin default"; if (json.isMember(long_name_underscores)) { ret_vector.clear(); size_t sz = json[long_name_underscores].size(); char buf[1000]; if(sz > 0) { if(sz == 1) { if(json[long_name_underscores][0].isString()) { ret_vector.push_back(atof(json[long_name_underscores][0].asCString())); value = json[long_name_underscores][0].asCString(); } else { ret_vector.push_back(json[long_name_underscores][0].asDouble()); sprintf(buf, "%f", ret_vector[0]); value = buf; } } else { value = ""; for(int i = 0; i < (int)sz - 1; i++) { if(json[long_name_underscores][i].isString()) { ret_vector.push_back(atof(json[long_name_underscores][i].asCString())); value += json[long_name_underscores][i].asCString(); value += ","; } else { ret_vector.push_back(json[long_name_underscores][i].asDouble()); sprintf(buf, "%f,", ret_vector[i]); string ss = buf; value += ss; } } if(json[long_name_underscores][(int)sz - 1].isString()) { ret_vector.push_back(atof(json[long_name_underscores][(int)sz - 1].asCString())); value += json[long_name_underscores][(int)sz - 1].asCString(); } else { ret_vector.push_back(json[long_name_underscores][(int)sz - 1].asDouble()); sprintf(buf, "%f", ret_vector[(int)sz - 1]); string ss = buf; value += ss; } } source = "parameters json file"; } } if (opts.HasOption(short_name, long_name_hyphens)) { ret_vector.clear(); opts.GetOption(ret_vector, default_value, short_name, long_name_hyphens); char buf[1000]; if(ret_vector.empty()) { cout << "Error setting: there is no value set for option: " + long_name_hyphens << endl; return 1; } else if(ret_vector.size() == 1) { sprintf(buf, "%f", ret_vector[0]); value = buf; } else { value = ""; for(size_t i = 0; i < ret_vector.size() - 1; i++) { sprintf(buf, "%f,", ret_vector[i]); string ss = buf; value += ss; } sprintf(buf, "%f", ret_vector[ret_vector.size() - 1]); string ss = buf; value += ss; } source = "command line option"; } cout << setw(35) << long_name_hyphens << " = " << setw(10) << value << " (double, " << source << ")" << endl; return 0; }
int main(int argc, const char *argv[]) { OptArgs opts; string position_file; string h5file_in; string source; string h5file_out; string destination; string positions_file; bool help; string flowlimit_arg; unsigned int flowlimit; vector<string>otherArgs; DumpStartingStateOfExtractWells (argc,argv); opts.ParseCmdLine(argc, argv); opts.GetOption(h5file_in, "", 'i', "input"); opts.GetOption(source, "", 's', "source"); opts.GetOption(h5file_out, "", 'o', "output"); opts.GetOption(destination, "", 'd', "destination"); opts.GetOption(flowlimit_arg, "", 'f', "flowlimit"); opts.GetOption(positions_file, "", 'p', "positions"); opts.GetOption(help, "false", 'h', "help"); opts.GetLeftoverArguments(otherArgs); // input data processing string line; vector<size_t> row_val; vector<size_t> col_val; ifstream filestream; if ( ! positions_file.empty() ) filestream.open(&positions_file.At(0)); istream &input = ( filestream.is_open() ) ? filestream : cin; while ( getline(input, line) ) { int num = -1; vector<size_t> ints; istringstream ss(line); while ( ss >> num && ints.size() < 2 ) { if (num < 0) { fprintf(stderr, "Found negative integer %d\n", num); exit(-1); } else ints.push_back((size_t)num); } if (ints.size() != 2) { fprintf(stderr, "Found %d integers in %s, expected 2\n", (int)ints.size(), &line[0]); continue; } row_val.push_back(ints.at(0)); col_val.push_back(ints.at(1)); } if (row_val.size() == 0 ) { fprintf(stdout, "No positions to extract, check input\n"); exit(0); } vector<size_t>input_positions(row_val.size(), 0); int numCPU = (int)sysconf( _SC_NPROCESSORS_ONLN ); int numThreads = MAXTHREADS < numCPU ? MAXTHREADS : numCPU; fprintf(stdout, "Using %d threads of %d cores\n", numThreads, numCPU); if (source.empty()) source = source + SIGNAL_IN; H5ReplayReader reader = H5ReplayReader(h5file_in, &source[0]); if ( h5file_out.empty() ) h5file_out = h5file_out + H5FILE_OUT; if ( destination.empty() ) destination = destination + SIGNAL_OUT; reader.Open(); int rank = reader.GetRank(); vector<hsize_t>dims(rank); vector<hsize_t>chunks(rank); reader.GetDims(dims); reader.GetChunkSize(chunks); reader.Close(); // convert input row, col positions to indices for (hsize_t i=0; i<input_positions.size(); i++) input_positions.At(i) = RowColToIndex(row_val.At(i), col_val.At(i), dims.At(0), dims.At(1)); sort(input_positions.begin(), input_positions.end()); fprintf(stdout, "Opened for read %s:%s with rank %d, row x col x flow dims=[ ", &h5file_in[0], &source[0], rank); for (int i=0; i<rank; i++) fprintf(stdout, "%d ", (int)dims.At(i)); fprintf(stdout, "], chunksize=[ "); for (int i=0; i<rank; i++) fprintf(stdout, "%d ", (int)chunks.At(i)); fprintf(stdout, "]\n"); H5ReplayRecorder recorder = H5ReplayRecorder(h5file_out, &destination[0],reader.GetType(),2); recorder.CreateFile(); { vector<hsize_t> dims_pos(1, input_positions.size()); string pos_name = "position"; H5ReplayRecorder recorder_pos = H5ReplayRecorder(h5file_out, &pos_name[0],H5T_NATIVE_ULONG,1); recorder_pos.CreateDataset(dims_pos); } { string chip_dims = "chip_dims"; H5ReplayRecorder recorder_chip_dims = H5ReplayRecorder(h5file_out, &chip_dims[0],H5T_NATIVE_ULLONG,1); vector<hsize_t> offset_dims(1,0); vector<hsize_t> count_dims(1,3); recorder_chip_dims.CreateDataset(count_dims); recorder_chip_dims.Write(offset_dims, count_dims, offset_dims, count_dims, &dims[0]); } if (flowlimit_arg.empty()) flowlimit = dims.At(2); else flowlimit = atoi(flowlimit_arg.c_str()); flowlimit = (flowlimit < dims.At(2)) ? flowlimit : dims.At(2); fprintf(stdout, "Using %u flows\n", flowlimit); // chunks no bigger than 100000 vector<hsize_t>chunks_out(2); chunks_out.At(0) = (input_positions.size() < 10000) ? input_positions.size() : 100000; chunks_out.At(1) = chunks.At(2); recorder.CreateDataset(chunks_out); vector<hsize_t> extension(2); extension.At(0) = input_positions.size(); extension.At(1) = dims.At(2); recorder.ExtendDataSet(extension); // extend if necessary fprintf(stdout, "Opening for write %s:%s with rank %d, position x flow chunks=[ ", &h5file_out[0], &destination[0], (int)chunks_out.size()); for (int i=0; i<(int)chunks_out.size(); i++) fprintf(stdout, "%d ", (int)chunks_out.At(i)); fprintf(stdout, "]\n"); int max_threads_ever = (dims.At(0)/chunks.At(0) +1)*(dims.At(1)/chunks.At(1) +1); thread_flags.resize (max_threads_ever, 0); // fprintf(stdout, "max_threads_ever = %d\n", max_threads_ever); unsigned int thread_id = 0; vector<thread_args> my_args( max_threads_ever ); size_t runningCount = 0; // layout is rows x cols x flows for (size_t row=0; row<dims.At(0); ) { for (size_t col=0; col<dims.At(1); ) { size_t ix = 0; hsize_t offset_out = 0; hsize_t count_out = 0; vector<size_t> limit(2); limit.At(0) = ( row+chunks.At(0) < dims.At(0) ) ? row+chunks.At(0) : dims.At(0); limit.At(1) = ( col+chunks.At(1) < dims.At(1) ) ? col+chunks.At(1) : dims.At(1); // fprintf(stdout, "Block row=%lu, col=%lu, count=[%lu %lu]\n", row, col, limit.At(0), limit.At(1)); // bool first_time=true; for (size_t rr=row; rr<limit.At(0) && ix < input_positions.size(); rr++) { for (size_t cc=col; cc<limit.At(1) && ix < input_positions.size(); cc++) { size_t pos = input_positions.At(ix); size_t chp_indx = RowColToIndex(rr,cc, dims.At(0), dims.At(1)); // if (first_time) // fprintf(stdout, "Entering loop with pos=%lu, ix=%lu, chp_indx=%lu\n", pos, ix, chp_indx); // first_time = false; if ( chp_indx < pos) continue; while ( chp_indx > pos){ // fprintf(stdout, "chp_indx=%lu > pos=%lu, incrementing ix=%lu\n", chp_indx, pos, ix); ix++; if (ix == input_positions.size()){ break; } pos = input_positions.At(ix); // first_time = true; } if( chp_indx == pos){ if ( count_out == 0) offset_out = runningCount; count_out++; runningCount++; // fprintf(stdout, "found: rr=%d, cc=%d, pos=%d, index=%d, ix=%lu, runningCount=%lu\n", (int)rr, (int)cc, (int)pos, (int)chp_indx, ix, runningCount); ix++; continue; } } } assert (ix <= input_positions.size() ); assert (runningCount <= input_positions.size() ); if (count_out > 0) { pthread_t thread; int thread_status = 0; assert( thread_id < thread_flags.size() ); my_args.at(thread_id).row = row; my_args.at(thread_id).col = col; my_args.at(thread_id).chunks = &chunks; my_args.at(thread_id).chunks_out = &chunks_out; my_args.at(thread_id).dims = &dims; my_args.at(thread_id).h5file_in = &h5file_in; my_args.at(thread_id).source = &source; my_args.at(thread_id).h5file_out = &h5file_out; my_args.at(thread_id).destination = &destination; my_args.at(thread_id).offset_out = offset_out; my_args.at(thread_id).count_out = count_out; my_args.at(thread_id).input_positions = &input_positions; my_args.at(thread_id).thread_id = thread_id; my_args.at(thread_id).flowlimit = flowlimit; // fprintf(stdout, "creating thread %d from row=%d (max %d), column=%d (max %d), offset_out=%llu, count_out=%llu\n", thread_id, (int)row, (int)dims.At(0), (int)col, (int)dims.At(1), offset_out, count_out); while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > numThreads) { // only have to be approximate, don't worry about races fprintf(stdout, "Sleeping before creating thread %d from row=%d (max %d), column=%d (max %d), offset_out=%llu, count_out=%llu ...\n", thread_id, (int)row, (int)dims.At(0), (int)col, (int)dims.At(1), offset_out, count_out); sleep(1); } thread_flags.At(thread_id) = 1; thread_status = pthread_create(&thread, NULL, do_subset, (void *)&my_args[thread_id]); // do_subset((void *)&my_args[thread_id]); assert (thread_status >= 0); thread_id++; } col += chunks.At(1); //fflush(stdout); } row += chunks.At(0); } while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > 0) { // wait for the threads to finish // fprintf(stdout, "Waiting ...\n"); sleep(1); } assert (runningCount == input_positions.size() ); cout << "Done." << endl; pthread_exit(NULL); }
int main(int argc, const char *argv[]) { OptArgs opts; opts.ParseCmdLine(argc, argv); string inFile, outFile; bool help = false; bool version = false; double lower = -5.0; double upper = 28.0; opts.GetOption(inFile, "", 'i', "input-file"); opts.GetOption(outFile, "", 'o', "output-file"); opts.GetOption(lower, "-5.0", '-', "wells-convert-low"); opts.GetOption(upper, "28.0", '-', "wells-convert-high"); opts.GetOption(help, "false", 'h', "help"); opts.GetOption(version, "false", 'v', "version"); opts.CheckNoLeftovers(); if (version) { fprintf (stdout, "%s", IonVersion::GetFullVersion("RawWellsConverter").c_str()); exit(0); } if (inFile.empty() || help) { cout << "RawWellsConverter - Convert unsigned short type wells file to float type wells file, or vice versa." << endl << "options: " << endl << " -i,--input-file input wells file." << endl << " -o,--output-file output wells file." << endl << " ,--wells-convert-low lower bound for converting to unsigned short." << endl << " ,--wells-convert-high upper bound for converting to unsigned short." << endl << " -h,--help this message." << endl << "" << endl << "usage: " << endl << " RawWellsConverter -i input_path/1.wells -o output_path/1.wells " << endl; exit(1); } struct stat sb; if(stat(inFile.c_str(), &sb) != 0) { cerr << "RawWellsConverter ERROR: " << inFile << " does not exist." << endl; exit (1); } if (outFile.empty()) { outFile = inFile; outFile += ".converted"; } string cmd("cp "); cmd += inFile; cmd += " "; cmd += outFile; int ret0 = system(cmd.c_str()); hid_t root = H5Fopen(outFile.c_str(), H5F_ACC_RDWR, H5P_DEFAULT); if(root < 0) { cerr << "RawWellsConverter ERROR: Fail to open " << outFile << endl; exit(1); } H5G_info_t group_info; group_info.nlinks = 0; if(H5Gget_info(root, &group_info) < 0) { H5Fclose(root); cerr << "RawWellsConverter ERROR: Fail H5Gget_info." << endl; exit(1); } char name[10]; string sName; bool bWells = false; bool bCopies = false; for(unsigned int i = 0; i < group_info.nlinks; ++i) { int size = H5Gget_objname_by_idx(root, i, NULL, 0); if(H5Gget_objname_by_idx(root, i, name, size + 1) < 0) { H5Fclose(root); cerr << "RawWellsConverter ERROR: Fail H5Gget_objname_by_idx." << endl; exit(1); } else { sName = name; if(sName == "wells") { bWells = true; } if(sName == "wells_copies") { bCopies = true; } } } if(!bWells) { H5Fclose(root); cerr << "RawWellsConverter ERROR: There is no dataset wells." << endl; exit(1); } hid_t dsWells = H5Dopen2(root, "wells", H5P_DEFAULT); if(dsWells < 0) { H5Fclose(root); cerr << "RawWellsConverter ERROR: Fail H5Dopen2 wells." << endl; exit(1); } bool saveAsUShort = false; if(H5Aexists(dsWells, "convert_low") > 0) { hid_t attrLower = H5Aopen(dsWells, "convert_low", H5T_NATIVE_FLOAT ); H5Aread(attrLower, H5T_NATIVE_FLOAT, &lower); saveAsUShort = true; H5Aclose(attrLower); } if(H5Aexists(dsWells, "convert_high") > 0) { hid_t attrUpper = H5Aopen(dsWells, "convert_high", H5T_NATIVE_FLOAT); H5Aread(attrUpper, H5T_NATIVE_FLOAT, &upper); saveAsUShort = true; H5Aclose(attrUpper); } hid_t dataSpace = H5Dget_space(dsWells); if(dataSpace < 0) { H5Dclose(dsWells); H5Fclose(root); cerr << "RawWellsConverter ERROR: Fail H5Dget_space wells." << endl; exit(1); } hssize_t dsSize = H5Sget_simple_extent_npoints(dataSpace); if(dsSize < 1) { H5Sclose(dataSpace); H5Dclose(dsWells); H5Fclose(root); cerr << "RawWellsConverter ERROR: Wrong size of dataset wells - " << dsSize << endl; exit(1); } int nRows = 0; int nCols = 0; int nFlows = 0; int rank = H5Sget_simple_extent_ndims(dataSpace); if(rank != 3) { bCopies = false; } else { hsize_t dims_out[3]; int status_n = H5Sget_simple_extent_dims(dataSpace, dims_out, NULL); if(status_n < 0) { bCopies = false; } else { nRows = dims_out[0]; nCols = dims_out[1]; nFlows = dims_out[2]; } } float* fPtr = new float[dsSize]; unsigned short* usPtr = new unsigned short[dsSize]; if(fPtr == NULL || usPtr == NULL) { H5Sclose(dataSpace); H5Dclose(dsWells); H5Fclose(root); cerr << "RawWellsConverter ERROR: Fail to allocate fPtr or usPtr." << endl; exit(1); } hid_t dcpl = H5Dget_create_plist(dsWells); if(dcpl < 0) { H5Sclose(dataSpace); H5Dclose(dsWells); H5Fclose(root); cerr << "RawWellsConverter ERROR: Fail H5Dget_create_plist." << endl; exit(1); } hid_t dapl = H5Dget_access_plist(dsWells); if(dapl < 0) { H5Pclose(dcpl); H5Sclose(dataSpace); H5Dclose(dsWells); H5Fclose(root); cerr << "RawWellsConverter ERROR: Fail H5Dget_access_plist." << endl; exit(1); } if(saveAsUShort) { cout << "RawWellsConverter: converting unsigned short wells file - " << inFile << " to float wells file - " << outFile << " with boundary (" << lower << "," << upper << ")" << endl; herr_t ret = H5Dread(dsWells, H5T_NATIVE_USHORT, H5S_ALL, H5S_ALL, H5P_DEFAULT, usPtr); H5Dclose(dsWells); if(ret < 0) { delete [] fPtr; delete [] usPtr; H5Sclose(dataSpace); H5Pclose(dcpl); H5Pclose(dapl); H5Fclose(root); cerr << "RawWellsConverter ERROR: Fail to read dataset wells." << endl; exit(1); } float factor = 65535.0 / (upper - lower); float* fPtr2 = fPtr; unsigned short* usPtr2 = usPtr; for(unsigned int i = 0; i < dsSize; ++i, ++fPtr2, ++usPtr2) { (*fPtr2) = (float)(*usPtr2) / factor + lower; } delete [] usPtr; if(bCopies) { vector<float> copies(nRows * nCols, 1.0); hid_t dsCopies = H5Dopen2(root, "wells_copies", H5P_DEFAULT); if(dsCopies < 0) { cerr << "RawWellsConverter WARNING: 1.wells files does not have wells_copies." << endl; } else { hid_t dataSpace2 = H5Dget_space(dsCopies); if(dataSpace2 < 0) { H5Dclose(dsCopies); cerr << "RawWellsConverter WARNING: fail to H5Dget_space for dataset wells_copies." << endl; } else { hssize_t dsSize2 = H5Sget_simple_extent_npoints(dataSpace2); H5Sclose(dataSpace2); if(dsSize2 != (hssize_t)(nRows * nCols)) { H5Dclose(dsCopies); cerr << "RawWellsConverter WARNING: dataset wells_copies size is " << dsSize2 << ", it is different from nRows * nCols = " << nRows * nCols << endl; } else { herr_t ret = H5Dread(dsCopies, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &copies[0]); H5Dclose(dsCopies); if(ret < 0) { copies.resize(nRows * nCols, 1.0); cerr << "RawWellsConverter WARNING: failto load dataset wells_copies." << endl; } } } } uint64_t fptrCount = 0; uint64_t copyCount = 0; for(int row = 0; row < nRows; ++row) { for(int col = 0; col < nCols; ++col) { for(int flow = 0; flow < nFlows; ++flow) { if(copies[copyCount] > 0) { fPtr[fptrCount] *= copies[copyCount]; } else { fPtr[fptrCount] = -1.0; } ++fptrCount; } ++copyCount; } } } H5Ldelete(root, "wells", H5P_DEFAULT); hid_t dsWells2 = H5Dcreate2 (root, "wells", H5T_NATIVE_FLOAT, dataSpace, H5P_DEFAULT, dcpl, dapl); if(dsWells2 < 0) { delete [] fPtr; H5Sclose(dataSpace); H5Pclose(dcpl); H5Pclose(dapl); H5Fclose(root); cerr << "RawWellsConverter ERROR: Fail to create dataset wells." << endl; exit(1); } ret = H5Dwrite(dsWells2, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, fPtr); delete [] fPtr; H5Dclose(dsWells2); H5Sclose(dataSpace); H5Pclose(dcpl); H5Pclose(dapl); H5Fclose(root); if(ret < 0) { cerr << "RawWellsConverter ERROR: Fail to write dataset wells." << endl; exit(1); } } else { cout << "RawWellsConverter: converting float wells file - " << inFile << " to unsigned short wells file - " << outFile << " with boundary (" << lower << "," << upper << ")" << endl; herr_t ret = H5Dread(dsWells, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, fPtr); H5Dclose(dsWells); if(ret < 0) { delete [] fPtr; delete [] usPtr; H5Sclose(dataSpace); H5Pclose(dcpl); H5Pclose(dapl); H5Fclose(root); cerr << "RawWellsConverter ERROR: Fail to read dataset wells." << endl; exit(1); } float factor = 65535.0 / (upper - lower); float* fPtr2 = fPtr; unsigned short* usPtr2 = usPtr; for(unsigned int i = 0; i < dsSize; ++i, ++fPtr2, ++usPtr2) { if(*fPtr2 < lower) { (*usPtr2) = 0; } else if(*fPtr2 > upper) { (*usPtr2) = 65535; } else { (*usPtr2) = (unsigned short)((*fPtr2 - lower) * factor); } } delete [] fPtr; H5Ldelete(root, "wells", H5P_DEFAULT); hid_t dsWells2 = H5Dcreate2 (root, "wells", H5T_NATIVE_USHORT, dataSpace, H5P_DEFAULT, dcpl, dapl); if(dsWells2 < 0) { delete [] usPtr; H5Sclose(dataSpace); H5Pclose(dcpl); H5Pclose(dapl); H5Fclose(root); cerr << "RawWellsConverter ERROR: Fail to create dataset wells." << endl; exit(1); } ret = H5Dwrite(dsWells2, H5T_NATIVE_USHORT, H5S_ALL, H5S_ALL, H5P_DEFAULT, usPtr); delete [] usPtr; if(dsWells2 < 0) { H5Dclose(dsWells2); H5Sclose(dataSpace); H5Pclose(dcpl); H5Pclose(dapl); H5Fclose(root); cerr << "RawWellsConverter ERROR: Fail to write dataset wells." << endl; exit(1); } float lower2 = (float)lower; float upper2 = (float)upper; hsize_t dimsa[1]; dimsa[0] = 1; hid_t dataspacea = H5Screate_simple(1, dimsa, NULL); hid_t attrLower = H5Acreate(dsWells2, "convert_low", H5T_NATIVE_FLOAT, dataspacea, H5P_DEFAULT, H5P_DEFAULT ); H5Awrite(attrLower, H5T_NATIVE_FLOAT, &lower2); H5Aclose(attrLower); hid_t attrUpper = H5Acreate(dsWells2, "convert_high", H5T_NATIVE_FLOAT, dataspacea, H5P_DEFAULT, H5P_DEFAULT ); H5Awrite(attrUpper, H5T_NATIVE_FLOAT, &upper2); H5Aclose(attrUpper); H5Sclose(dataspacea); H5Dclose(dsWells2); H5Sclose(dataSpace); H5Pclose(dcpl); H5Pclose(dapl); H5Fclose(root); } return 0; }
int main(int argc, const char *argv[]) { #ifdef _DEBUG atexit(memstatus); dbgmemInit(); #endif /* _DEBUG */ printf ("%s - %s-%s (%s)\n", argv[0], IonVersion::GetVersion().c_str(), IonVersion::GetRelease().c_str(), IonVersion::GetSvnRev().c_str()); string bamInputFilename; string fastaInputFilename; string jsonOutputFilename; bool help; OptArgs opts; opts.ParseCmdLine(argc, argv); opts.GetOption(bamInputFilename, "", '-', "bam"); opts.GetOption(fastaInputFilename, "", '-', "ref"); opts.GetOption(jsonOutputFilename, "TFStats.json", '-', "output-json"); opts.GetOption(help, "false", 'h', "help"); opts.CheckNoLeftovers(); if (help || bamInputFilename.empty() || fastaInputFilename.empty()) return showHelp(); // Parse BAM header BAMReader bamReader(bamInputFilename); bamReader.open(); bam_header_t *header = (bam_header_t *)bamReader.get_header_ptr(); int numFlows = 0; string flowOrder; string key; if (header->l_text >= 3) { if (header->dict == 0) header->dict = sam_header_parse2(header->text); int nEntries = 0; char **tmp = sam_header2list(header->dict, "RG", "FO", &nEntries); if (nEntries) { flowOrder = tmp[0]; numFlows = flowOrder.length(); } if (tmp) free(tmp); nEntries = 0; tmp = sam_header2list(header->dict, "RG", "KS", &nEntries); if (nEntries) { key = tmp[0]; } if (tmp) free(tmp); } if (numFlows <= 0) { fprintf(stderr, "[TFMapper] Could not retrieve flow order from FO BAM tag. SFF-specific tags absent?\n"); exit(1); } if (key.empty()) { fprintf(stderr, "[TFMapper] Could not retrieve key sequence from KS BAM tag. SFF-specific tags absent?\n"); exit(1); } //printf("Retrieved flow order from bam: %s (%d)\n", flowOrder.c_str(), numFlows); //printf("Retrieved key from bam: %s\n", key.c_str()); // Retrieve test fragment sequences vector<string> referenceSequences; PopulateReferenceSequences(referenceSequences, fastaInputFilename, header->n_targets, header->target_name, string("")); // Process the BAM reads and generate metrics int numTFs = header->n_targets; vector<int> TFCount(numTFs,0); MetricGeneratorQualityHistograms metricGeneratorQualityHistograms[numTFs]; MetricGeneratorHPAccuracy metricGeneratorHPAccuracy[numTFs]; MetricGeneratorSNR metricGeneratorSNR[numTFs]; MetricGeneratorAvgIonogram metricGeneratorAvgIonogram[numTFs]; for (BAMReader::iterator i = bamReader.get_iterator(); i.good(); i.next()) { BAMRead bamRead = i.get(); int bestTF = bamRead.get_tid(); if (bestTF < 0) continue; BAMUtils bamUtil(bamRead); TFCount[bestTF]++; // Extract flowspace signal from FZ BAM tag uint16_t *bam_flowgram = NULL; uint8_t *fz = bam_aux_get(bamRead.get_bam_ptr(), "FZ"); if (fz != NULL) { if (fz[0] == (uint8_t)'B' && fz[1] == (uint8_t)'S' && *((uint32_t *)(fz+2)) == (uint32_t)numFlows) bam_flowgram = (uint16_t *)(fz+6); } if (bam_flowgram == NULL) { fprintf(stderr, "[TFMapper] Could not retrieve flow signal from FZ BAM tag. SFF-specific tags absent?\n"); exit(1); } // Use alignments to generate "synchronized" flowspace reference and read ionograms // TODO: Do proper flowspace alignment string genome = key + bamUtil.get_tdna(); string calls = key + bamUtil.get_qdna(); int numBases = min(genome.length(),calls.length()); vector<int> refIonogram(numFlows, 0); vector<int> readIonogram(numFlows, 0); int numFlowsRead = 0; int numFlowsRef = 0; char gC = flowOrder[0]; int gBC = 0; for (int iBase = 0; (iBase < numBases) && (numFlowsRead < numFlows) && (numFlowsRef < numFlows); iBase++) { // Conversion for reads (independent of reference) if (calls[iBase] != '-') { while ((calls[iBase] != flowOrder[numFlowsRead]) && (numFlowsRead < numFlows)) numFlowsRead++; if (numFlowsRead < numFlows) readIonogram[numFlowsRead]++; } if (genome[iBase] != '-') { if (genome[iBase] != gC) { // Since a new homopolymer begins, need to drop off the old one while ((gC != flowOrder[numFlowsRef]) && (numFlowsRef < numFlows)) { numFlowsRef++; if (numFlowsRef < numFlows) refIonogram[numFlowsRef] = 0; } if (numFlowsRef < numFlows) refIonogram[numFlowsRef] = gBC; gC = genome[iBase]; gBC = 0; } gBC++; if (genome[iBase] == calls[iBase]) numFlowsRef = numFlowsRead; } } int validFlows = min(numFlowsRef, numFlowsRead); metricGeneratorSNR[bestTF].AddElement(bam_flowgram ,key.c_str(), flowOrder); metricGeneratorAvgIonogram[bestTF].AddElement(bam_flowgram, numFlows); metricGeneratorQualityHistograms[bestTF].AddElement(bamUtil.get_phred_len(10),bamUtil.get_phred_len(17)); for (int iFlow = 0; iFlow < validFlows-20; iFlow++) metricGeneratorHPAccuracy[bestTF].AddElement(refIonogram[iFlow],readIonogram[iFlow]); } // Save stats to a json file Json::Value outputJson(Json::objectValue); for(int i = 0; i < numTFs; i++) { if (TFCount[i] < minTFCount) continue; Json::Value currentTFJson(Json::objectValue); currentTFJson["TF Name"] = header->target_name[i]; currentTFJson["TF Seq"] = referenceSequences[i]; currentTFJson["Num"] = TFCount[i]; currentTFJson["Top Reads"] = Json::Value(Json::arrayValue); // Obsolete metricGeneratorSNR[i].PrintSNR(currentTFJson); metricGeneratorHPAccuracy[i].PrintHPAccuracy(currentTFJson); metricGeneratorQualityHistograms[i].PrintMetrics(currentTFJson); metricGeneratorAvgIonogram[i].PrintIonograms(currentTFJson); outputJson[header->target_name[i]] = currentTFJson; } bamReader.close(); // Closing invalidates the header pointers if (!jsonOutputFilename.empty()) { ofstream out(jsonOutputFilename.c_str(), ios::out); if (out.good()) out << outputJson.toStyledString(); } return 0; }
int main(int argc, const char *argv[]) { OptArgs opts; string h5file; string source; string destination; vector<string> infiles; bool help; string flowlimit_arg; unsigned int flowlimit; DumpStartingStateOfNormWells (argc,argv); opts.ParseCmdLine(argc, argv); opts.GetOption(h5file, "", '-', "h5file"); opts.GetOption(source, "", 's', "source"); opts.GetOption(destination, "", 'd', "destination"); opts.GetOption(flowlimit_arg, "", 'f', "flowlimit"); opts.GetOption(help, "false", 'h', "help"); opts.GetLeftoverArguments(infiles); if(help || infiles.empty() || (infiles.size() > 1) ) { usage(); } h5file = infiles.front(); int numCPU = (int)sysconf( _SC_NPROCESSORS_ONLN ); int numThreads = MAXTHREADS < numCPU ? MAXTHREADS : numCPU; fprintf(stdout, "Using %d threads of %d cores\n", numThreads, numCPU); if (source.empty()) source = source + SIGNAL_IN; H5ReplayReader reader = H5ReplayReader(h5file, &source[0]); if ( destination.empty() ) destination = destination + SIGNAL_OUT; H5ReplayRecorder recorder = (source.compare(destination)==0) ? H5ReplayRecorder(h5file, &destination[0]) : H5ReplayRecorder(h5file, &destination[0],reader.GetType(),reader.GetRank()); reader.Open(); int rank = reader.GetRank(); vector<hsize_t>dims(rank,0); vector<hsize_t>chunks(rank,0); reader.GetDims(dims); reader.GetChunkSize(chunks); reader.Close(); fprintf(stdout, "Opening for read %s:%s with rank %d, row x col x flow dims=[ ", &h5file[0], &source[0], rank); for (int i=0; i<rank; i++) fprintf(stdout, "%d ", (int)dims[i]); fprintf(stdout, "], chunksize=[ "); for (int i=0; i<rank; i++) fprintf(stdout, "%d ", (int)chunks[i]); fprintf(stdout, "]\n"); if (flowlimit_arg.empty()) flowlimit = dims[2]; else flowlimit = atoi(flowlimit_arg.c_str()); flowlimit = (flowlimit < dims[2]) ? flowlimit : dims[2]; fprintf(stdout, "Using %u flows\n", flowlimit); // hard code region size to be at least 100x100 chunks[0] = (chunks[0] < 100) ? 100 : chunks[0]; chunks[1] = (chunks[1] < 100) ? 100 : chunks[1]; recorder.CreateDataset(chunks); int max_threads_ever = (dims[0]/chunks[0] +1)*(dims[1]/chunks[1] +1); thread_flags.resize (max_threads_ever, 0); // fprintf(stdout, "max_threads_ever = %d\n", max_threads_ever); unsigned int thread_id = 0; vector<compute_norm_args> my_args( max_threads_ever ); // layout is rows x cols x flows for (hsize_t row=0; row<dims[0]; ) { for (hsize_t col=0; col<dims[1]; ) { pthread_t thread; int thread_status; assert( thread_id < thread_flags.size() ); my_args.at(thread_id).row = row; my_args.at(thread_id).col = col; my_args.at(thread_id).chunks = &chunks; my_args.at(thread_id).dims = &dims; my_args.at(thread_id).h5file = &h5file; my_args.at(thread_id).source = &source; my_args.at(thread_id).destination = &destination; my_args.at(thread_id).thread_id = thread_id; my_args.at(thread_id).flowlimit = flowlimit; fprintf(stdout, "creating thread %d from row=%d (max %d), column=%d (max %d)\n", thread_id, (int)row, (int)dims[0], (int)col, (int)dims[1]); while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > numThreads) { // only have to be approximate, don't worry about races // fprintf(stdout, "Sleeping ...\n"); sleep(1); } thread_flags[thread_id] = 1; thread_status = pthread_create(&thread, NULL, compute_norm, (void *)&my_args[thread_id]); // compute_norm((void *)&my_args[thread_id]); assert (thread_status >= 0); thread_id++; col += chunks[1]; //fflush(stdout); } row += chunks[0]; } while (accumulate(thread_flags.begin(), thread_flags.end(), 0) > 0) { // wait for the threads to finish // fprintf(stdout, "Waiting ...\n"); sleep(1); } cout << "Done." << endl; pthread_exit(NULL); }