//****************************************************************************** // ExtractDNA //****************************************************************************** void Bed2Fa::ExtractDNA() { /* Make sure that we can oen all of the files successfully*/ // open the fasta database for reading ifstream faDb(_dbFile.c_str(), ios::in); if ( !faDb ) { cerr << "Error: The requested fasta database file (" << _dbFile << ") could not be opened. Exiting!" << endl; exit (1); } // open and memory-map genome file FastaReference *fr = new FastaReference; bool memmap = true; fr->open(_dbFile, memmap); BED bed, nullBed; string sequence; _bed->Open(); while (_bed->GetNextBed(bed)) { if (_bed->_status == BED_VALID) { // make sure we are extracting >= 1 bp if (bed.zeroLength == false) { size_t seqLength = fr->sequenceLength(bed.chrom); // seqLength > 0 means chrom was found in index. // seqLength == 0 otherwise. if (seqLength) { // make sure this feature will not exceed the end of the chromosome. if ( (bed.start <= seqLength) && (bed.end <= seqLength) ) { int length = bed.end - bed.start; sequence = fr->getSubSequence(bed.chrom, bed.start, length); ReportDNA(bed, sequence); } else { cerr << "Feature (" << bed.chrom << ":" << bed.start << "-" << bed.end << ") beyond the length of " << bed.chrom << " size (" << seqLength << " bp). Skipping." << endl; } } else { cerr << "WARNING. chromosome (" << bed.chrom << ") was not found in the FASTA file. Skipping."<< endl; } } // handle zeroLength else { cerr << "Feature (" << bed.chrom << ":" << bed.start+1 << "-" << bed.end-1 << ") has length = 0, Skipping." << endl; } bed = nullBed; } } _bed->Close(); }
int main(int argc, char** argv) { string bedFileName; string vcfFileName; string fastaFileName; bool intersecting = false; bool unioning = false; bool invert = false; bool contained = true; bool overlapping = false; int windowsize = 30; if (argc == 1) printSummary(argv); int c; while (true) { static struct option long_options[] = { /* These options set a flag. */ //{"verbose", no_argument, &verbose_flag, 1}, {"help", no_argument, 0, 'h'}, {"bed", required_argument, 0, 'b'}, {"invert", no_argument, 0, 'v'}, {"intersect-vcf", required_argument, 0, 'i'}, {"union-vcf", required_argument, 0, 'u'}, {"contained", no_argument, 0, 'c'}, {"overlapping", no_argument, 0, 'o'}, {"window-size", required_argument, 0, 'w'}, {"reference", required_argument, 0, 'r'}, {0, 0, 0, 0} }; /* getopt_long stores the option index here. */ int option_index = 0; c = getopt_long (argc, argv, "hvcob:i:u:w:r:", long_options, &option_index); if (c == -1) break; switch (c) { case 'w': windowsize = atoi(optarg); break; case 'b': bedFileName = string(optarg); break; case 'i': intersecting = true; vcfFileName = string(optarg); break; case 'u': unioning = true; vcfFileName = string(optarg); break; case 'r': fastaFileName = string(optarg); break; case 'v': invert = true; break; case 'c': contained = true; break; case 'o': overlapping = true; break; case 'h': printSummary(argv); break; case '?': printSummary(argv); exit(1); break; default: abort (); } } bool usingBED = false; if (!bedFileName.empty()) { usingBED = true; } BedReader bed; if (usingBED) { bed.open(bedFileName); } VariantCallFile variantFile; string inputFilename; if (optind == argc - 1) { inputFilename = argv[optind]; variantFile.open(inputFilename); } else { variantFile.open(std::cin); } if (!variantFile.is_open()) { cerr << "could not open VCF file" << endl; exit(1); } if (usingBED) { variantFile.parseSamples = false; } VariantCallFile otherVariantFile; if (!vcfFileName.empty()) { otherVariantFile.open(vcfFileName); if (!otherVariantFile.is_open()) { cerr << "could not open VCF file " << vcfFileName << endl; exit(1); } } FastaReference reference; if (unioning || intersecting) { if (fastaFileName.empty()) { cerr << "a reference is required for haplotype-based intersection and unioniong" << endl; exit(1); } reference.open(fastaFileName); } if (!unioning && !intersecting) { variantFile.parseSamples = false; // faster, as when we are // only bed-intersecting we // can do position-only // output and don't have to // manipulate specific // alleles } // read the VCF file for union or intersection into an interval tree // indexed using some proximity window map<string, IntervalTree<Variant*> > variantIntervals; map<string, list<Variant> > otherVariants; map<string, vector<Interval<Variant*> > > otherVariantIntervals; if (unioning || intersecting) { Variant ovar(otherVariantFile); while (otherVariantFile.getNextVariant(ovar)) { long int left = ovar.position; long int right = left + ovar.ref.size(); // this should be 1-past the end otherVariants[ovar.sequenceName].push_back(ovar); Variant* v = &otherVariants[ovar.sequenceName].back(); otherVariantIntervals[ovar.sequenceName].push_back(Interval<Variant*>(left, right, v)); } for (map<string, vector<Interval<Variant*> > >::iterator j = otherVariantIntervals.begin(); j != otherVariantIntervals.end(); ++j) { variantIntervals[j->first] = IntervalTree<Variant*>(j->second); } } set<Variant*> outputVariants; long unsigned int lastOutputPosition = 0; string lastSequenceName; cout << variantFile.header; Variant var(variantFile); while (variantFile.getNextVariant(var)) { if (lastSequenceName.empty()) { lastSequenceName = var.sequenceName; } else if (lastSequenceName != var.sequenceName) { if (unioning) { vector<Interval<Variant*> > previousRecords; long int lastSeqLength = reference.sequenceLength(lastSequenceName); variantIntervals[lastSequenceName].findContained(lastOutputPosition, lastSeqLength, previousRecords); for (vector<Interval<Variant*> >::iterator r = previousRecords.begin(); r != previousRecords.end(); ++r) { Variant* v = r->value; if (outputVariants.find(v) == outputVariants.end()) { outputVariants.insert(v); cout << *v << endl; // does this output everything in correct order? } } lastSequenceName = var.sequenceName; lastOutputPosition = 0; } } if (usingBED) { BedTarget record(var.sequenceName, var.position, var.position + var.ref.size(), ""); vector<BedTarget*> overlaps = bed.targetsOverlapping(record); if (!invert && !overlaps.empty()) { cout << variantFile.line << endl; } else if (invert && overlaps.empty()) { cout << variantFile.line << endl; } } else if (unioning || intersecting) { // TODO check overlaps with union/intersection // hmm... for unioning, you might need to step through the original VCF records // but the idea is to exclude the haplotype-based duplicates vector<Interval<Variant*> > results; variantIntervals[var.sequenceName].findContained(var.position - windowsize, var.position + var.ref.size() + windowsize, results); vector<Variant*> overlapping; for (vector<Interval<Variant*> >::iterator r = results.begin(); r != results.end(); ++r) { overlapping.push_back(r->value); } if (unioning) { // unioning strategy // write out all the records from the last file // between the last one printed out and the first // one we're about to print out vector<Interval<Variant*> > previousRecords; variantIntervals[var.sequenceName].findOverlapping(lastOutputPosition, var.position - windowsize, previousRecords); map<long int, vector<Variant*> > variants; for (vector<Interval<Variant*> >::iterator r = previousRecords.begin(); r != previousRecords.end(); ++r) { Variant* v = r->value; if (outputVariants.find(v) == outputVariants.end()) { outputVariants.insert(v); variants[v->position].push_back(v); } } for (map<long int, vector<Variant*> >::iterator v = variants.begin(); v != variants.end(); ++v) { for (vector<Variant*>::iterator o = v->second.begin(); o != v->second.end(); ++o) { cout << **o << endl; lastOutputPosition = max(lastOutputPosition, (*o)->position); } } // TODO find the duplicates for the other file } if (overlapping.empty()) { if (unioning || (intersecting && invert)) { cout << var << endl; lastOutputPosition = max(lastOutputPosition, var.position); } } else { // get the min and max of the overlaps int haplotypeStart = var.position; int haplotypeEnd = var.position + var.ref.size(); for (vector<Variant*>::iterator v = overlapping.begin(); v != overlapping.end(); ++v) { haplotypeStart = min((*v)->position, (long unsigned int) haplotypeStart); haplotypeEnd = max((*v)->position + (*v)->ref.size(), (long unsigned int) haplotypeEnd); } // for everything overlapping and the current variant, construct the local haplotype within the bounds // if there is an exact match, the alllele in the current VCF does intersect string referenceHaplotype = reference.getSubSequence(var.sequenceName, haplotypeStart - 1, haplotypeEnd - haplotypeStart); map<string, vector<Variant*> > haplotypes; for (vector<Variant*>::iterator v = overlapping.begin(); v != overlapping.end(); ++v) { Variant& variant = **v; for (vector<string>::iterator a = variant.alt.begin(); a != variant.alt.end(); ++a) { string haplotype = referenceHaplotype; // get the relative start and end coordinates for the variant alternate allele int relativeStart = variant.position - haplotypeStart; haplotype.replace(relativeStart, variant.ref.size(), *a); haplotypes[haplotype].push_back(*v); } } // determine the non-intersecting alts vector<string> altsToRemove; for (vector<string>::iterator a = var.alt.begin(); a != var.alt.end(); ++a) { string haplotype = referenceHaplotype; int relativeStart = var.position - haplotypeStart; haplotype.replace(relativeStart, var.ref.size(), *a); map<string, vector<Variant*> >::iterator h = haplotypes.find(haplotype); if ((intersecting && !invert && h == haplotypes.end()) || (intersecting && invert && h != haplotypes.end()) || (unioning && h != haplotypes.end())) { altsToRemove.push_back(*a); } } // remove the non-overlapping (intersecting) or overlapping (unioning) alts for (vector<string>::iterator a = altsToRemove.begin(); a != altsToRemove.end(); ++a) { var.removeAlt(*a); } if (unioning) { // somehow sort the records and combine them? map<long int, vector<Variant*> > variants; for (vector<Variant*>::iterator o = overlapping.begin(); o != overlapping.end(); ++o) { if ((*o)->position <= var.position && // check ensures proper ordering of variants on output outputVariants.find(*o) == outputVariants.end()) { outputVariants.insert(*o); variants[(*o)->position].push_back(*o); } } // add in the current variant, if it has alts left if (!var.alt.empty()) { variants[var.position].push_back(&var); } for (map<long int, vector<Variant*> >::iterator v = variants.begin(); v != variants.end(); ++v) { for (vector<Variant*>::iterator o = v->second.begin(); o != v->second.end(); ++o) { cout << **o << endl; lastOutputPosition = max(lastOutputPosition, (*o)->position); } } } else { // if any alts remain, output the variant record if (!var.alt.empty()) { cout << var << endl; lastOutputPosition = max(lastOutputPosition, var.position); } } } } } // if unioning, and any variants remain, output them if (unioning) { for (map<string, list<Variant> >::iterator chrom = otherVariants.find(lastSequenceName); chrom != otherVariants.end(); ++chrom) { for (list<Variant>::iterator v = chrom->second.begin(); v != chrom->second.end(); ++v) { Variant* variant = &*v; if (outputVariants.find(variant) == outputVariants.end()) { outputVariants.insert(variant); cout << *variant << endl; // TODO guarantee sorting } } } } exit(0); // why? return 0; }
void realign_bam(Parameters& params) { FastaReference reference; reference.open(params.fasta_reference); bool suppress_output = false; int dag_window_size = params.dag_window_size; // open BAM file BamReader reader; if (!reader.Open("stdin")) { cerr << "could not open stdin for reading" << endl; exit(1); } BamWriter writer; if (!params.dry_run && !writer.Open("stdout", reader.GetHeaderText(), reader.GetReferenceData())) { cerr << "could not open stdout for writing" << endl; exit(1); } // store the names of all the reference sequences in the BAM file map<int, string> referenceIDToName; vector<RefData> referenceSequences = reader.GetReferenceData(); int i = 0; for (RefVector::iterator r = referenceSequences.begin(); r != referenceSequences.end(); ++r) { referenceIDToName[i] = r->RefName; ++i; } vcf::VariantCallFile vcffile; if (!params.vcf_file.empty()) { if (!vcffile.open(params.vcf_file)) { cerr << "could not open VCF file " << params.vcf_file << endl; exit(1); } } else { cerr << "realignment requires VCF file" << endl; exit(1); } vcf::Variant var(vcffile); BamAlignment alignment; map<long int, vector<BamAlignment> > alignmentSortQueue; // get alignment // assemble DAG in region around alignment // loop for each alignment in BAM: // update DAG when current alignment gets close to edge of assembled DAG // attempt to realign if read has a certain number of mismatches + gaps or softclips, weighted by basequal // if alignment to DAG has fewer mismatches and gaps than original alignment, use it // flatten read into reference space (for now just output alleles from VCF un-spanned insertions) // write read to queue for streaming re-sorting (some positional change will occur) long int dag_start_position = 0; string currentSeqname; string ref; //vector<Cigar> cigars; // contains the Cigar strings of nodes in the graph //vector<long int> refpositions; // contains the reference start coords of nodes in the graph ReferenceMappings ref_map; gssw_graph* graph = gssw_graph_create(0); int8_t* nt_table = gssw_create_nt_table(); int8_t* mat = gssw_create_score_matrix(params.match, params.mism); int total_reads = 0; int total_realigned = 0; int total_improved = 0; bool emptyDAG = false; // if the dag is constructed over empty sequence // such as when realigning reads mapped to all-N sequence if (params.debug) { cerr << "about to start processing alignments" << endl; } while (reader.GetNextAlignment(alignment)) { string& seqname = referenceIDToName[alignment.RefID]; if (params.debug) { cerr << "--------------------------------------------" << endl << "processing alignment " << alignment.Name << " at " << seqname << ":" << alignment.Position << endl; } /* if (!alignment.IsMapped() && graph->size == 0) { if (params.debug) { cerr << "unable to build DAG using unmapped read " << alignment.Name << " @ " << seqname << ":" << alignment.Position << " no previous mapped read found and DAG currently empty" << endl; } alignmentSortQueue[dag_start_position+dag_window_size].push_back(alignment); continue; } */ ++total_reads; BamAlignment originalAlignment = alignment; long unsigned int initialAlignmentPosition = alignment.Position; //if (dag_start_position == 1) { // dag_start_position = max(1, (int)initialAlignmentPosition - dag_window_size/2); //} // should we construct a new DAG? do so when 3/4 of the way through the current one // center on current position + 1/2 dag window // TODO check this scheme using some scribbles on paper // alignment.IsMapped() if ((seqname != currentSeqname || ((alignment.Position + (alignment.QueryBases.size()/2) > (3*dag_window_size/4) + dag_start_position))) && alignment.Position < reference.sequenceLength(seqname)) { if (seqname != currentSeqname) { if (params.debug) { cerr << "switched ref seqs" << endl; } dag_start_position = max((long int) 0, (long int) (alignment.GetEndPosition() - dag_window_size/2)); // recenter DAG } else if (!ref_map.empty()) { dag_start_position = dag_start_position + dag_window_size/2; dag_start_position = max(dag_start_position, (long int) (alignment.GetEndPosition() - dag_window_size/2)); } else { dag_start_position = alignment.Position - dag_window_size/2; } dag_start_position = max((long int)0, dag_start_position); // TODO get sequence length and use to bound noted window size (edge case) //cerr << "getting ref " << seqname << " " << max((long int) 0, dag_start_position) << " " << dag_window_size << endl; // get variants for new DAG vector<vcf::Variant> variants; if (!vcffile.setRegion(seqname, dag_start_position + 1, dag_start_position + dag_window_size)) { // this is not necessarily an error; there should be a better way to check for VCF file validity /* cerr << "could not set region on VCF file to " << currentSeqname << ":" << dag_start_position << "-" << dag_start_position + ref.size() << endl; */ //exit(1); } else { // check first variant if (vcffile.getNextVariant(var)) { while (var.position <= dag_start_position + 1) { //cerr << "var position == dag_start_position " << endl; dag_start_position -= 1; vcffile.setRegion(seqname, dag_start_position + 1, dag_start_position + dag_window_size); if (!vcffile.getNextVariant(var)) { break; } } } vcffile.setRegion(seqname, dag_start_position + 1, dag_start_position + dag_window_size); while (vcffile.getNextVariant(var)) { if (params.debug) cerr << "getting variant at " << var.sequenceName << ":" << var.position << endl; //cerr << var.position << " + " << var.ref.length() << " <= " << dag_start_position << " + " << dag_window_size << endl; //cerr << var.position << " >= " << dag_start_position << endl; if (var.position + var.ref.length() <= dag_start_position + dag_window_size && var.position >= dag_start_position) { variants.push_back(var); } } } //cerr << "dag_start_position " << dag_start_position << endl; ref = reference.getSubSequence(seqname, max((long int) 0, dag_start_position), dag_window_size); // 0/1 conversion // clear graph and metadata ref_map.clear(); //cigars.clear(); //refpositions.clear(); gssw_graph_destroy(graph); if (params.debug) { cerr << "constructing DAG" << endl; } // and build the DAG graph = gssw_graph_create(0); constructDAGProgressive(graph, ref_map, ref, seqname, variants, dag_start_position, nt_table, mat, params.flat_input_vcf); if (params.debug) { cerr << "graph has " << graph->size << " nodes" << endl; cerr << "DAG generated from input variants over " << seqname << ":" << dag_start_position << "-" << dag_start_position + dag_window_size << endl; } if (params.display_dag) { gssw_graph_print(graph); /* for (Backbone::iterator b = backbone.begin(); b != backbone.end(); ++b) { cout << b->first << " " << b->first->id << " " << b->second.ref_position << " " << b->second.cigar << endl << b->first->seq << endl; } */ } if (graph->size == 1 && allN(ref) || graph->size == 0) { if (params.debug) { cerr << "DAG is empty (1 node, all N). Alignment is irrelevant." << endl; } emptyDAG = true; } else { emptyDAG = false; } } AlignmentStats stats_before; bool was_mapped = alignment.IsMapped(); bool has_realigned = false; if (was_mapped) { if (dag_start_position + dag_window_size < alignment.GetEndPosition()) { ref = reference.getSubSequence(seqname, max((long int) 0, dag_start_position), alignment.GetEndPosition() - dag_start_position); // 0/1 conversion } } if (params.debug) { if (emptyDAG) { cerr << "cannot realign against empty (all-N single node) graph" << endl; } } if (!emptyDAG && shouldRealign(alignment, ref, dag_start_position, params, stats_before)) { ++total_realigned; if (params.debug) { cerr << "realigning: " << alignment.Name << " " << alignment.QueryBases << endl << " aligned @ " << alignment.Position << " to variant graph over " << seqname << ":" << dag_start_position << "-" << dag_start_position + dag_window_size << endl; } //{ try { Cigar flat_cigar; string read = alignment.QueryBases; string qualities = alignment.Qualities; int score; long int position; string strand; gssw_graph_mapping* gm = gswalign(graph, ref_map, read, qualities, params, position, score, flat_cigar, strand, nt_table, mat); // gssw_graph_mapping_destroy(gm); if (params.dry_run) { if (strand == "-" && !alignment.IsMapped()) { read = reverseComplement(read); } cout << read << endl; cout << graph_mapping_to_string(gm) << endl; cout << score << " " << strand << " " << position << " " << flat_cigar << endl; } else { /* if (strand == "-") { read = reverseComplement(trace_report.read); } */ // TODO the qualities are not on the right side of the read if (strand == "-" && alignment.IsMapped()) { // if we're realigning, this is always true unless we swapped strands alignment.SetIsReverseStrand(true); //reverse(alignment.Qualities.begin(), alignment.Qualities.end()); // reverse qualities } //alignment.QueryBases = reverseComplement(trace_report.read); alignment.QueryBases = read; alignment.Qualities = qualities; alignment.Position = position;// + 1;// + 1;//(trace_report.node->position - 1) + trace_report.x; alignment.SetIsMapped(true); if (!alignment.MapQuality) { alignment.MapQuality = 20; // horrible hack... at least approximate with alignment mismatches against graph } // check if somehow we've ended up with an indel at the ends // if so, grab the reference sequence right beyond it and add // a single match to the cigar, allowing variant detection methods // to run on the results without internal modification Cigar& cigar = flat_cigar; //cerr << flat_cigar << " " << flat_cigar.readLen() << " " << flat_cigar.refLen() << endl; int flankSize = params.flatten_flank; if (cigar.front().isIndel() || (cigar.front().isSoftclip() && cigar.at(1).isIndel())) { alignment.Position -= flankSize; string refBase = reference.getSubSequence(seqname, alignment.Position, flankSize); if (cigar.front().isSoftclip()) { alignment.QueryBases.erase(alignment.QueryBases.begin(), alignment.QueryBases.begin()+cigar.front().length); alignment.Qualities.erase(alignment.Qualities.begin(), alignment.Qualities.begin()+cigar.front().length); cigar.erase(cigar.begin()); } alignment.QueryBases.insert(0, refBase); alignment.Qualities.insert(0, string(flankSize, shortInt2QualityChar(30))); Cigar newCigar; newCigar.push_back(CigarElement(flankSize, 'M')); newCigar.append(flat_cigar); flat_cigar = newCigar; } if (cigar.back().isIndel() || (cigar.back().isSoftclip() && cigar.at(cigar.size()-2).isIndel())) { string refBase = reference.getSubSequence(seqname, alignment.Position + flat_cigar.refLen(), flankSize); if (cigar.back().isSoftclip()) { alignment.QueryBases.erase(alignment.QueryBases.end()-cigar.back().length, alignment.QueryBases.end()); alignment.Qualities.erase(alignment.Qualities.end()-cigar.back().length, alignment.Qualities.end()); cigar.pop_back(); } Cigar newCigar; newCigar.push_back(CigarElement(flankSize, 'M')); flat_cigar.append(newCigar); //flat_cigar.append(newCigar); alignment.QueryBases.append(refBase); alignment.Qualities.append(string(flankSize, shortInt2QualityChar(30))); } flat_cigar.toCigarData(alignment.CigarData); //cerr << flat_cigar << " " << flat_cigar.readLen() << " " << flat_cigar.refLen() << endl; if (dag_start_position + dag_window_size < alignment.GetEndPosition()) { ref = reference.getSubSequence(seqname, max((long int) 0, dag_start_position), alignment.GetEndPosition() - dag_start_position); // 0/1 conversion } AlignmentStats stats_after; countMismatchesAndGaps(alignment, flat_cigar, ref, dag_start_position, stats_after, params.debug); /* if ((!was_mapped || (stats_before.softclip_qsum >= stats_after.softclip_qsum && stats_before.mismatch_qsum >= stats_after.mismatch_qsum)) && acceptRealignment(alignment, ref, dag_start_position, params, stats_after)) { */ /* if ((!was_mapped || (stats_before.softclip_qsum + stats_before.mismatch_qsum >= stats_after.softclip_qsum + stats_after.mismatch_qsum)) && acceptRealignment(alignment, ref, dag_start_position, params, stats_after)) { */ // we accept the new alignment if... if (!was_mapped // it wasn't mapped previously // or if we have removed soft clips or mismatches (per quality) from the alignment //|| ((stats_before.softclip_qsum >= stats_after.softclip_qsum // && stats_before.mismatch_qsum >= stats_after.mismatch_qsum) || ((stats_before.softclip_qsum + stats_before.mismatch_qsum >= stats_after.softclip_qsum + stats_after.mismatch_qsum) // and if we have added gaps, we have added them to remove mismatches or softclips && (stats_before.gaps >= stats_after.gaps // accept any time we reduce gaps while not increasing softclips/mismatches || (stats_before.gaps < stats_after.gaps // and allow gap increases when they improve the alignment && (stats_before.softclip_qsum + stats_before.mismatch_qsum > stats_after.softclip_qsum + stats_after.mismatch_qsum)))) // and the alignment must not have more than the acceptable number of gaps, softclips, or mismatches // as provided in input parameters && acceptRealignment(alignment, ref, dag_start_position, params, stats_after)) { // keep the alignment // TODO require threshold of softclips to keep alignment (or count of gaps, mismatches,...) if (params.debug) { cerr << "realigned " << alignment.Name << " to graph, which it maps to with " << stats_after.mismatch_qsum << "q in mismatches and " << stats_after.softclip_qsum << "q in soft clips" << endl; } ++total_improved; has_realigned = true; } else { // reset to old version of alignment if (params.debug) { cerr << "failed realignment of " << alignment.Name << " to graph, which it maps to with: " << stats_after.mismatch_qsum << "q in mismatches " << "(vs " << stats_before.mismatch_qsum << "q before), and " << stats_after.softclip_qsum << "q in soft clips " << "(vs " << stats_before.softclip_qsum << "q before) " << endl; } has_realigned = false; alignment = originalAlignment; } } //} // try block } catch (...) { cerr << "exception when realigning " << alignment.Name << " at position " << referenceIDToName[alignment.RefID] << ":" << alignment.Position << " " << alignment.QueryBases << endl; // reset to original alignment has_realigned = false; alignment = originalAlignment; } } // ensure correct order if alignments move long int maxOutputPos = initialAlignmentPosition - dag_window_size; // if we switched sequences we need to flush out all the reads from the previous one string lastSeqname = currentSeqname; if (seqname != currentSeqname) { // so the max output position is set past the end of the last chromosome if (!currentSeqname.empty()) { maxOutputPos = reference.sequenceLength(currentSeqname) + dag_window_size; } currentSeqname = seqname; } if (!params.dry_run) { map<long int, vector<BamAlignment> >::iterator p = alignmentSortQueue.begin(); for ( ; p != alignmentSortQueue.end(); ++p) { // except if we are running in unsorted mode, stop when we are at the window size if (!params.unsorted_output && p->first > maxOutputPos) { break; // no more to do } else { for (vector<BamAlignment>::iterator a = p->second.begin(); a != p->second.end(); ++a) { writer.SaveAlignment(*a); } } } if (p != alignmentSortQueue.begin()) { alignmentSortQueue.erase(alignmentSortQueue.begin(), p); } if (!params.only_realigned || has_realigned) { alignmentSortQueue[alignment.Position].push_back(alignment); } } } // end GetNextAlignment loop if (!params.dry_run) { map<long int, vector<BamAlignment> >::iterator p = alignmentSortQueue.begin(); for ( ; p != alignmentSortQueue.end(); ++p) { for (vector<BamAlignment>::iterator a = p->second.begin(); a != p->second.end(); ++a) writer.SaveAlignment(*a); } } gssw_graph_destroy(graph); free(nt_table); free(mat); reader.Close(); writer.Close(); if (params.debug) { cerr << "total reads:\t" << total_reads << endl; cerr << "realigned:\t" << total_realigned << endl; cerr << "improved:\t" << total_improved << endl; } }