void PairToPair::IntersectPairs() { // load the "B" bed file into a map so // that we can easily compare "A" to it for overlaps _bedB->loadBedPEFileIntoMap(); int lineNum = 0; vector<BEDCOV> hitsA1B1, hitsA1B2, hitsA2B1, hitsA2B2; // reserve some space hitsA1B1.reserve(100); hitsA1B2.reserve(100); hitsA2B1.reserve(100); hitsA2B2.reserve(100); BedLineStatus bedStatus; BEDPE a, nullBedPE; _bedA->Open(); while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) { if (bedStatus == BED_VALID) { // identify overlaps b/w the pairs FindOverlaps(a, hitsA1B1, hitsA1B2, hitsA2B1, hitsA2B2); // reset space for next BEDPE hitsA1B1.clear(); hitsA1B2.clear(); hitsA2B1.clear(); hitsA2B2.clear(); a = nullBedPE; } } _bedA->Close(); }
/* Constructor */ ProcessPeaks::ProcessPeaks( string peakFileA, string peakFileB, string genomeFile) { _peakFileA = peakFileA; _peakFileB = peakFileB; _genomeTable = genomeFile; _peakA = new BedFile(peakFileA); _peakB = new BedFile(peakFileB); _genome = new GenomeFile(genomeFile); FindOverlaps(); }
void BedIntersect::IntersectBed() { // load the "B" file into a map in order to // compare each entry in A to it in search of overlaps. _bedB->loadBedFileIntoMap(); int lineNum = 0; vector<BED> hits; hits.reserve(100); BED a, nullBed; BedLineStatus bedStatus; // open the "A" file, process each BED entry and searh for overlaps. _bedA->Open(); while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) { if (bedStatus == BED_VALID) { // treat the BED as a single "block" if (_obeySplits == false) { FindOverlaps(a, hits); hits.clear(); a = nullBed; } // split the BED12 into blocks and look for overlaps in each discrete block else { bedVector bedBlocks; // vec to store the discrete BED "blocks" splitBedIntoBlocks(a, lineNum, bedBlocks); vector<BED>::const_iterator bedItr = bedBlocks.begin(); vector<BED>::const_iterator bedEnd = bedBlocks.end(); for (; bedItr != bedEnd; ++bedItr) { FindOverlaps(*bedItr, hits); hits.clear(); } a = nullBed; } } } _bedA->Close(); }
void PairToPair::IntersectPairs() { // load the "B" bed file into a map so // that we can easily compare "A" to it for overlaps _bedB->loadBedPEFileIntoMap(); int lineNum = 0; BedLineStatus bedStatus; BEDPE a, nullBedPE; _bedA->Open(); while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) { if (bedStatus == BED_VALID) { // identify overlaps b/w the pairs FindOverlaps(a); a = nullBedPE; } } _bedA->Close(); }
void BedIntersectPE::IntersectBedPE() { // load the "B" bed file into a map so // that we can easily compare "A" to it for overlaps _bedB->loadBedFileIntoMap(); int lineNum = 0; // current input line number vector<BED> hits, hits1, hits2; // vector of potential hits // reserve some space hits.reserve(100); hits1.reserve(100); hits2.reserve(100); BEDPE a, nullBedPE; BedLineStatus bedStatus; _bedA->Open(); while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) { if (bedStatus == BED_VALID) { if ( (_searchType == "ispan") || (_searchType == "ospan") || (_searchType == "notispan") || (_searchType == "notospan") ) { if (a.chrom1 == a.chrom2) { FindSpanningOverlaps(a, hits, _searchType); hits.clear(); } } else { FindOverlaps(a, hits1, hits2, _searchType); hits1.clear(); hits2.clear(); } a = nullBedPE; } } _bedA->Close(); }
void BedIntersectPE::ProcessBamBlock (const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, BamWriter &writer) { vector<BED> hits, hits1, hits2; // vector of potential hits hits.reserve(1000); // reserve some space hits1.reserve(1000); hits2.reserve(1000); bool overlapsFound; // flag to indicate if overlaps were found if ( (_searchType == "either") || (_searchType == "xor") || (_searchType == "both") || (_searchType == "notboth") || (_searchType == "neither") ) { // create a new BEDPE feature from the BAM alignments. BEDPE a; ConvertBamToBedPE(bam1, bam2, refs, a); if (_bamOutput == true) { // BAM output // write to BAM if correct hits found overlapsFound = FindOneOrMoreOverlaps(a, _searchType); if (overlapsFound == true) { writer.SaveAlignment(bam1); writer.SaveAlignment(bam2); } } else { // BEDPE output FindOverlaps(a, hits1, hits2, _searchType); hits1.clear(); hits2.clear(); } } else if ( (_searchType == "ispan") || (_searchType == "ospan") ) { // only look for ispan and ospan when both ends are mapped. if (bam1.IsMapped() && bam2.IsMapped()) { // only do an inspan or outspan check if the alignment is intrachromosomal if (bam1.RefID == bam2.RefID) { // create a new BEDPE feature from the BAM alignments. BEDPE a; ConvertBamToBedPE(bam1, bam2, refs, a); if (_bamOutput == true) { // BAM output // look for overlaps, and write to BAM if >=1 were found overlapsFound = FindOneOrMoreSpanningOverlaps(a, _searchType); if (overlapsFound == true) { writer.SaveAlignment(bam1); writer.SaveAlignment(bam2); } } else { // BEDPE output FindSpanningOverlaps(a, hits, _searchType); hits.clear(); } } } } else if ( (_searchType == "notispan") || (_searchType == "notospan") ) { // only look for notispan and notospan when both ends are mapped. if (bam1.IsMapped() && bam2.IsMapped()) { // only do an inspan or outspan check if the alignment is intrachromosomal if (bam1.RefID == bam2.RefID) { // create a new BEDPE feature from the BAM alignments. BEDPE a; ConvertBamToBedPE(bam1, bam2, refs, a); if (_bamOutput == true) { // BAM output // write to BAM if there were no overlaps overlapsFound = FindOneOrMoreSpanningOverlaps(a, _searchType); if (overlapsFound == false) { writer.SaveAlignment(bam1); writer.SaveAlignment(bam2); } } else { // BEDPE output FindSpanningOverlaps(a, hits, _searchType); hits.clear(); } } // if inter-chromosomal or orphaned, we know it's not ispan and not ospan else if (_bamOutput == true) { writer.SaveAlignment(bam1); writer.SaveAlignment(bam2); } } // if both ends aren't mapped, we know that it's notispan and not ospan else if (_bamOutput == true) { writer.SaveAlignment(bam1); writer.SaveAlignment(bam2); } } }
void BedIntersect::IntersectBam(string bamFile) { // load the "B" bed file into a map so // that we can easily compare "A" to it for overlaps _bedB = new BedFile(_bedBFile); _bedB->loadBedFileIntoMap(); // create a dummy BED A file for printing purposes if not // using BAM output. if (_bamOutput == false) { _bedA = new BedFile(_bedAFile); _bedA->bedType = 12; } // open the BAM file BamReader reader; BamWriter writer; reader.Open(bamFile); // get header & reference information string bamHeader = reader.GetHeaderText(); RefVector refs = reader.GetReferenceData(); // open a BAM output to stdout if we are writing BAM if (_bamOutput == true) { // set compression mode BamWriter::CompressionMode compressionMode = BamWriter::Compressed; if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed; writer.SetCompressionMode(compressionMode); // open our BAM writer writer.Open("stdout", bamHeader, refs); } vector<BED> hits; // reserve some space hits.reserve(100); BamAlignment bam; // get each set of alignments for each pair. while (reader.GetNextAlignment(bam)) { // save an unaligned read if -v if (!bam.IsMapped()) { if (_noHit == true) writer.SaveAlignment(bam); continue; } // break alignment into discrete blocks, bedVector bed_blocks; string chrom = refs.at(bam.RefID).RefName; GetBamBlocks(bam, chrom, bed_blocks, false, true); // create a basic BED entry from the BAM alignment BED bed; MakeBedFromBam(bam, chrom, bed_blocks, bed); bool overlapsFound = false; if ((_bamOutput == true) && (_obeySplits == false)) { overlapsFound = _bedB->anyHits(bed.chrom, bed.start, bed.end, bed.strand, _sameStrand, _diffStrand, _overlapFraction, _reciprocal); } else if ( ((_bamOutput == true) && (_obeySplits == true)) || ((_bamOutput == false) && (_obeySplits == true)) ) { // find the hits that overlap with the full span of the blocked BED _bedB->allHits(bed.chrom, bed.start, bed.end, bed.strand, hits, _sameStrand, _diffStrand, _overlapFraction, _reciprocal); // find the overlaps between the block in A and B overlapsFound = FindBlockedOverlaps(bed, bed_blocks, hits, _bamOutput); } else if ((_bamOutput == false) && (_obeySplits == false)) { FindOverlaps(bed, hits); } // save the BAM alignment if overlap reqs. were met if (_bamOutput == true) { if ((overlapsFound == true) && (_noHit == false)) writer.SaveAlignment(bam); else if ((overlapsFound == false) && (_noHit == true)) writer.SaveAlignment(bam); } hits.clear(); } // close the relevant BAM files. reader.Close(); if (_bamOutput == true) { writer.Close(); } }
void BedIntersect::IntersectBed() { // create new BED file objects for A and B _bedA = new BedFile(_bedAFile); _bedB = new BedFile(_bedBFile); if (_sortedInput == false) { // load the "B" file into a map in order to // compare each entry in A to it in search of overlaps. _bedB->loadBedFileIntoMap(); vector<BED> hits; hits.reserve(100); BED a; // open the "A" file, process each BED entry and searh for overlaps. _bedA->Open(); // report A's header first if asked. if (_printHeader == true) { _bedA->PrintHeader(); } while (_bedA->GetNextBed(a)) { if (_bedA->_status == BED_VALID) { // treat the BED as a single "block" if (_obeySplits == false) FindOverlaps(a, hits); // split the BED12 into blocks and look for overlaps in each discrete block else { // find the hits that overlap with the full span of the blocked BED _bedB->allHits(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand, _overlapFraction, _reciprocal); // break a into discrete blocks, as we need to // measure overlap with the individual blocks, not the full span. bedVector a_blocks; GetBedBlocks(a, a_blocks); // find the overlaps between the block in A and B // last parm is false as a is not a BAM entry FindBlockedOverlaps(a, a_blocks, hits, false); } hits.clear(); } } _bedA->Close(); } else { // use the chromsweep algorithm to detect overlaps on the fly. ChromSweep sweep = ChromSweep(_bedA, _bedB, _sameStrand, _diffStrand, _overlapFraction, _reciprocal, _printHeader); pair<BED, vector<BED> > hit_set; hit_set.second.reserve(10000); while (sweep.Next(hit_set)) { if (_obeySplits == false) processHits(hit_set.first, hit_set.second); else { bedVector a_blocks; GetBedBlocks(hit_set.first, a_blocks); FindBlockedOverlaps(hit_set.first, a_blocks, hit_set.second, false); } } } }
void BedIntersect::IntersectBam(string bamFile) { // load the "B" bed file into a map so // that we can easily compare "A" to it for overlaps _bedB->loadBedFileIntoMap(); // open the BAM file BamReader reader; BamWriter writer; reader.Open(bamFile); // get header & reference information string header = reader.GetHeaderText(); RefVector refs = reader.GetReferenceData(); // open a BAM output to stdout if we are writing BAM if (_bamOutput == true) { // open our BAM writer writer.Open("stdout", header, refs, _isUncompressedBam); } vector<BED> hits; // reserve some space hits.reserve(100); _bedA->bedType = 6; BamAlignment bam; // get each set of alignments for each pair. while (reader.GetNextAlignment(bam)) { if (bam.IsMapped()) { BED a; a.chrom = refs.at(bam.RefID).RefName; a.start = bam.Position; a.end = bam.GetEndPosition(false); // build the name field from the BAM alignment. a.name = bam.Name; if (bam.IsFirstMate()) a.name += "/1"; if (bam.IsSecondMate()) a.name += "/2"; a.score = ToString(bam.MapQuality); a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-"; if (_bamOutput == true) { bool overlapsFound = false; // treat the BAM alignment as a single "block" if (_obeySplits == false) { overlapsFound = FindOneOrMoreOverlap(a); } // split the BAM alignment into discrete blocks and // look for overlaps only within each block. else { bool overlapFoundForBlock; bedVector bedBlocks; // vec to store the discrete BED "blocks" from a // we don't want to split on "D" ops, hence the "false" getBamBlocks(bam, refs, bedBlocks, false); vector<BED>::const_iterator bedItr = bedBlocks.begin(); vector<BED>::const_iterator bedEnd = bedBlocks.end(); for (; bedItr != bedEnd; ++bedItr) { overlapFoundForBlock = FindOneOrMoreOverlap(a); if (overlapFoundForBlock == true) overlapsFound = true; } } if (overlapsFound == true) { if (_noHit == false) writer.SaveAlignment(bam); } else { if (_noHit == true) { writer.SaveAlignment(bam); } } } else { // treat the BAM alignment as a single BED "block" if (_obeySplits == false) { FindOverlaps(a, hits); hits.clear(); } // split the BAM alignment into discrete BED blocks and // look for overlaps only within each block. else { bedVector bedBlocks; // vec to store the discrete BED "blocks" from a getBamBlocks(bam, refs, bedBlocks, false); vector<BED>::const_iterator bedItr = bedBlocks.begin(); vector<BED>::const_iterator bedEnd = bedBlocks.end(); for (; bedItr != bedEnd; ++bedItr) { FindOverlaps(*bedItr, hits); hits.clear(); } } } } } // close the relevant BAM files. reader.Close(); if (_bamOutput == true) { writer.Close(); } }