int main(int argc, char* argv[]) { // Open a BAM file: char* bamFile = argv[1]; // char* bamIndex = argv[2]; BAMReader reader(bamFile); // , bamIndex); reader.open(); assert(reader); // Print out list of reference sequences, and their lengths: cout << "Found " << reader.num_refs() << " reference sequences:" << endl; for(int i=0; i<reader.num_refs(); ++i) cout << setw(9) << reader.refs()[i] << " " << reader.lens()[i] << endl; // Print out list of reads: for (BAMReader::iterator i = reader.get_iterator(); i.good(); i.next()) { BAMRead read = i.get(); cout << read.to_string(); for (Sequence::iterator s_iter = read.get_seq().get_iterator(); s_iter.good(); s_iter.next()) cout << s_iter.get(); // nuc from SEQ cout << endl; for (Cigar::iterator c_iter = read.get_cigar().get_iterator(); c_iter.good(); c_iter.next()) cout << c_iter.len() << ":" << c_iter.op() << "; "; cout << endl; // Don't use MD directly. Use BAMUtils. BAMUtils utils(read); cout << utils.get_qdna() << endl << utils.get_matcha() << endl << utils.get_tdna() << endl << endl; } }
int main(int argc, const char *argv[]) { #ifdef _DEBUG atexit(memstatus); dbgmemInit(); #endif /* _DEBUG */ printf ("%s - %s-%s (%s)\n", argv[0], IonVersion::GetVersion().c_str(), IonVersion::GetRelease().c_str(), IonVersion::GetSvnRev().c_str()); string bamInputFilename; string fastaInputFilename; string jsonOutputFilename; bool help; OptArgs opts; opts.ParseCmdLine(argc, argv); opts.GetOption(bamInputFilename, "", '-', "bam"); opts.GetOption(fastaInputFilename, "", '-', "ref"); opts.GetOption(jsonOutputFilename, "TFStats.json", '-', "output-json"); opts.GetOption(help, "false", 'h', "help"); opts.CheckNoLeftovers(); if (help || bamInputFilename.empty() || fastaInputFilename.empty()) return showHelp(); // Parse BAM header BAMReader bamReader(bamInputFilename); bamReader.open(); bam_header_t *header = (bam_header_t *)bamReader.get_header_ptr(); int numFlows = 0; string flowOrder; string key; if (header->l_text >= 3) { if (header->dict == 0) header->dict = sam_header_parse2(header->text); int nEntries = 0; char **tmp = sam_header2list(header->dict, "RG", "FO", &nEntries); if (nEntries) { flowOrder = tmp[0]; numFlows = flowOrder.length(); } if (tmp) free(tmp); nEntries = 0; tmp = sam_header2list(header->dict, "RG", "KS", &nEntries); if (nEntries) { key = tmp[0]; } if (tmp) free(tmp); } if (numFlows <= 0) { fprintf(stderr, "[TFMapper] Could not retrieve flow order from FO BAM tag. SFF-specific tags absent?\n"); exit(1); } if (key.empty()) { fprintf(stderr, "[TFMapper] Could not retrieve key sequence from KS BAM tag. SFF-specific tags absent?\n"); exit(1); } //printf("Retrieved flow order from bam: %s (%d)\n", flowOrder.c_str(), numFlows); //printf("Retrieved key from bam: %s\n", key.c_str()); // Retrieve test fragment sequences vector<string> referenceSequences; PopulateReferenceSequences(referenceSequences, fastaInputFilename, header->n_targets, header->target_name, string("")); // Process the BAM reads and generate metrics int numTFs = header->n_targets; vector<int> TFCount(numTFs,0); MetricGeneratorQualityHistograms metricGeneratorQualityHistograms[numTFs]; MetricGeneratorHPAccuracy metricGeneratorHPAccuracy[numTFs]; MetricGeneratorSNR metricGeneratorSNR[numTFs]; MetricGeneratorAvgIonogram metricGeneratorAvgIonogram[numTFs]; for (BAMReader::iterator i = bamReader.get_iterator(); i.good(); i.next()) { BAMRead bamRead = i.get(); int bestTF = bamRead.get_tid(); if (bestTF < 0) continue; BAMUtils bamUtil(bamRead); TFCount[bestTF]++; // Extract flowspace signal from FZ BAM tag uint16_t *bam_flowgram = NULL; uint8_t *fz = bam_aux_get(bamRead.get_bam_ptr(), "FZ"); if (fz != NULL) { if (fz[0] == (uint8_t)'B' && fz[1] == (uint8_t)'S' && *((uint32_t *)(fz+2)) == (uint32_t)numFlows) bam_flowgram = (uint16_t *)(fz+6); } if (bam_flowgram == NULL) { fprintf(stderr, "[TFMapper] Could not retrieve flow signal from FZ BAM tag. SFF-specific tags absent?\n"); exit(1); } // Use alignments to generate "synchronized" flowspace reference and read ionograms // TODO: Do proper flowspace alignment string genome = key + bamUtil.get_tdna(); string calls = key + bamUtil.get_qdna(); int numBases = min(genome.length(),calls.length()); vector<int> refIonogram(numFlows, 0); vector<int> readIonogram(numFlows, 0); int numFlowsRead = 0; int numFlowsRef = 0; char gC = flowOrder[0]; int gBC = 0; for (int iBase = 0; (iBase < numBases) && (numFlowsRead < numFlows) && (numFlowsRef < numFlows); iBase++) { // Conversion for reads (independent of reference) if (calls[iBase] != '-') { while ((calls[iBase] != flowOrder[numFlowsRead]) && (numFlowsRead < numFlows)) numFlowsRead++; if (numFlowsRead < numFlows) readIonogram[numFlowsRead]++; } if (genome[iBase] != '-') { if (genome[iBase] != gC) { // Since a new homopolymer begins, need to drop off the old one while ((gC != flowOrder[numFlowsRef]) && (numFlowsRef < numFlows)) { numFlowsRef++; if (numFlowsRef < numFlows) refIonogram[numFlowsRef] = 0; } if (numFlowsRef < numFlows) refIonogram[numFlowsRef] = gBC; gC = genome[iBase]; gBC = 0; } gBC++; if (genome[iBase] == calls[iBase]) numFlowsRef = numFlowsRead; } } int validFlows = min(numFlowsRef, numFlowsRead); metricGeneratorSNR[bestTF].AddElement(bam_flowgram ,key.c_str(), flowOrder); metricGeneratorAvgIonogram[bestTF].AddElement(bam_flowgram, numFlows); metricGeneratorQualityHistograms[bestTF].AddElement(bamUtil.get_phred_len(10),bamUtil.get_phred_len(17)); for (int iFlow = 0; iFlow < validFlows-20; iFlow++) metricGeneratorHPAccuracy[bestTF].AddElement(refIonogram[iFlow],readIonogram[iFlow]); } // Save stats to a json file Json::Value outputJson(Json::objectValue); for(int i = 0; i < numTFs; i++) { if (TFCount[i] < minTFCount) continue; Json::Value currentTFJson(Json::objectValue); currentTFJson["TF Name"] = header->target_name[i]; currentTFJson["TF Seq"] = referenceSequences[i]; currentTFJson["Num"] = TFCount[i]; currentTFJson["Top Reads"] = Json::Value(Json::arrayValue); // Obsolete metricGeneratorSNR[i].PrintSNR(currentTFJson); metricGeneratorHPAccuracy[i].PrintHPAccuracy(currentTFJson); metricGeneratorQualityHistograms[i].PrintMetrics(currentTFJson); metricGeneratorAvgIonogram[i].PrintIonograms(currentTFJson); outputJson[header->target_name[i]] = currentTFJson; } bamReader.close(); // Closing invalidates the header pointers if (!jsonOutputFilename.empty()) { ofstream out(jsonOutputFilename.c_str(), ios::out); if (out.good()) out << outputJson.toStyledString(); } return 0; }