Example #1
0
void PairToPair::IntersectPairs() {
	
	// load the "B" bed file into a map so
	// that we can easily compare "A" to it for overlaps
	_bedB->loadBedPEFileIntoMap();
	
	int lineNum = 0;	
	vector<BEDCOV> hitsA1B1, hitsA1B2, hitsA2B1, hitsA2B2;
	// reserve some space
	hitsA1B1.reserve(100); hitsA1B2.reserve(100); hitsA2B1.reserve(100); hitsA2B2.reserve(100);
	
	BedLineStatus bedStatus;
	BEDPE a, nullBedPE;
	
	_bedA->Open();	
	while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) {
		if (bedStatus == BED_VALID) {
            // identify overlaps b/w the pairs
			FindOverlaps(a, hitsA1B1, hitsA1B2, hitsA2B1, hitsA2B2);
		
			// reset space for next BEDPE
			hitsA1B1.clear(); hitsA1B2.clear(); hitsA2B1.clear(); hitsA2B2.clear();		
			a = nullBedPE;
		}
	}
	_bedA->Close();
}
Example #2
0
/*
Constructor
*/
ProcessPeaks::ProcessPeaks( string peakFileA,
        string peakFileB,
        string genomeFile) {
    _peakFileA = peakFileA;
    _peakFileB = peakFileB;
    _genomeTable = genomeFile;

    _peakA = new BedFile(peakFileA);
    _peakB = new BedFile(peakFileB);
    _genome = new GenomeFile(genomeFile);

    FindOverlaps();
}
Example #3
0
void BedIntersect::IntersectBed() {

	// load the "B" file into a map in order to 
	// compare each entry in A to it in search of overlaps.
	_bedB->loadBedFileIntoMap();                                                                                                                 
	
	int lineNum = 0;
	vector<BED> hits;
	hits.reserve(100);
	BED a, nullBed;	
	BedLineStatus bedStatus;
	
	// open the "A" file, process each BED entry and searh for overlaps.
	_bedA->Open();
	while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {
		if (bedStatus == BED_VALID) {
		    // treat the BED as a single "block"
		    if (_obeySplits == false) {
    			FindOverlaps(a, hits);
    			hits.clear();
    			a = nullBed;
			}
			// split the BED12 into blocks and look for overlaps in each discrete block
            else {
                bedVector bedBlocks;  // vec to store the discrete BED "blocks"
                splitBedIntoBlocks(a, lineNum, bedBlocks);
                
                vector<BED>::const_iterator bedItr  = bedBlocks.begin();
            	vector<BED>::const_iterator bedEnd  = bedBlocks.end();
            	for (; bedItr != bedEnd; ++bedItr) {
        	        FindOverlaps(*bedItr, hits);
                    hits.clear();
        	    }
        	    a = nullBed;  
            }
		}
	}
	_bedA->Close();
}
Example #4
0
void PairToPair::IntersectPairs() {

    // load the "B" bed file into a map so
    // that we can easily compare "A" to it for overlaps
    _bedB->loadBedPEFileIntoMap();

    int lineNum = 0;
    BedLineStatus bedStatus;
    BEDPE a, nullBedPE;

    _bedA->Open();
    while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) {
        if (bedStatus == BED_VALID) {
            // identify overlaps b/w the pairs
            FindOverlaps(a);
            a = nullBedPE;
        }
    }
    _bedA->Close();
}
Example #5
0
void BedIntersectPE::IntersectBedPE() {

    // load the "B" bed file into a map so
    // that we can easily compare "A" to it for overlaps
    _bedB->loadBedFileIntoMap();

    int lineNum = 0;                    // current input line number
    vector<BED> hits, hits1, hits2;     // vector of potential hits

    // reserve some space
    hits.reserve(100);
    hits1.reserve(100);
    hits2.reserve(100);

    BEDPE a, nullBedPE;
    BedLineStatus bedStatus;

    _bedA->Open();
    while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) {
        if (bedStatus == BED_VALID) {
            if ( (_searchType == "ispan") || (_searchType == "ospan") ||
                 (_searchType == "notispan") || (_searchType == "notospan") ) {
                if (a.chrom1 == a.chrom2) {
                    FindSpanningOverlaps(a, hits, _searchType);
                    hits.clear();
                }
            }
            else {
                FindOverlaps(a, hits1, hits2, _searchType);
                hits1.clear();
                hits2.clear();
            }
            a = nullBedPE;
        }
    }
    _bedA->Close();
}
Example #6
0
void BedIntersectPE::ProcessBamBlock (const BamAlignment &bam1, const BamAlignment &bam2,
                                      const RefVector &refs, BamWriter &writer) {

    vector<BED> hits, hits1, hits2;         // vector of potential hits
    hits.reserve(1000);                     // reserve some space
    hits1.reserve(1000);
    hits2.reserve(1000);

    bool overlapsFound;                     // flag to indicate if overlaps were found

    if ( (_searchType == "either") || (_searchType == "xor") ||
              (_searchType == "both") || (_searchType == "notboth") ||
              (_searchType == "neither") ) {

        // create a new BEDPE feature from the BAM alignments.
        BEDPE a;
        ConvertBamToBedPE(bam1, bam2, refs, a);
        if (_bamOutput == true) {   // BAM output
            // write to BAM if correct hits found
            overlapsFound = FindOneOrMoreOverlaps(a, _searchType);
            if (overlapsFound == true) {
                writer.SaveAlignment(bam1);
                writer.SaveAlignment(bam2);
            }
        }
        else {  // BEDPE output
            FindOverlaps(a, hits1, hits2, _searchType);
            hits1.clear();
            hits2.clear();
        }
    }
    else if ( (_searchType == "ispan") || (_searchType == "ospan") ) {
        // only look for ispan and ospan when both ends are mapped.
        if (bam1.IsMapped() && bam2.IsMapped()) {
            // only do an inspan or outspan check if the alignment is intrachromosomal
            if (bam1.RefID == bam2.RefID) {
                // create a new BEDPE feature from the BAM alignments.
                BEDPE a;
                ConvertBamToBedPE(bam1, bam2, refs, a);
                if (_bamOutput == true) {   // BAM output
                    // look for overlaps, and write to BAM if >=1 were found
                    overlapsFound = FindOneOrMoreSpanningOverlaps(a, _searchType);
                    if (overlapsFound == true) {
                        writer.SaveAlignment(bam1);
                        writer.SaveAlignment(bam2);
                    }
                }
                else {  // BEDPE output
                    FindSpanningOverlaps(a, hits, _searchType);
                    hits.clear();
                }
            }
        }
    }
    else if ( (_searchType == "notispan") || (_searchType == "notospan") ) {
        // only look for notispan and notospan when both ends are mapped.
        if (bam1.IsMapped() && bam2.IsMapped()) {
            // only do an inspan or outspan check if the alignment is intrachromosomal
            if (bam1.RefID == bam2.RefID) {
                // create a new BEDPE feature from the BAM alignments.
                BEDPE a;
                ConvertBamToBedPE(bam1, bam2, refs, a);
                if (_bamOutput == true) {   // BAM output
                    // write to BAM if there were no overlaps
                    overlapsFound = FindOneOrMoreSpanningOverlaps(a, _searchType);
                    if (overlapsFound == false) {
                        writer.SaveAlignment(bam1);
                        writer.SaveAlignment(bam2);
                    }
                }
                else {  // BEDPE output
                    FindSpanningOverlaps(a, hits, _searchType);
                    hits.clear();
                }
            }
            // if inter-chromosomal or orphaned, we know it's not ispan and not ospan
            else if (_bamOutput == true) {
                writer.SaveAlignment(bam1);
                writer.SaveAlignment(bam2);
            }
        }
        // if both ends aren't mapped, we know that it's notispan and not ospan
        else if (_bamOutput == true) {
            writer.SaveAlignment(bam1);
            writer.SaveAlignment(bam2);
        }
    }
}
Example #7
0
void BedIntersect::IntersectBam(string bamFile) {

    // load the "B" bed file into a map so
    // that we can easily compare "A" to it for overlaps
    _bedB = new BedFile(_bedBFile);
    _bedB->loadBedFileIntoMap();

    // create a dummy BED A file for printing purposes if not
    // using BAM output.
    if (_bamOutput == false) {
        _bedA = new BedFile(_bedAFile);
        _bedA->bedType = 12;
    }
    // open the BAM file
    BamReader reader;
    BamWriter writer;
    reader.Open(bamFile);
    // get header & reference information
    string bamHeader  = reader.GetHeaderText();
    RefVector refs    = reader.GetReferenceData();
    // open a BAM output to stdout if we are writing BAM
    if (_bamOutput == true) {
        // set compression mode
        BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
        if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
        writer.SetCompressionMode(compressionMode);
        // open our BAM writer
        writer.Open("stdout", bamHeader, refs);
    }
    vector<BED> hits;
    // reserve some space
    hits.reserve(100);
    BamAlignment bam;    
    // get each set of alignments for each pair.
    while (reader.GetNextAlignment(bam)) {

        // save an unaligned read if -v
        if (!bam.IsMapped()) {
            if (_noHit == true)
                writer.SaveAlignment(bam);
            continue;
        }   
        // break alignment into discrete blocks,
        bedVector bed_blocks;
        string chrom = refs.at(bam.RefID).RefName;
        GetBamBlocks(bam, chrom, bed_blocks, false, true);
        // create a basic BED entry from the BAM alignment
        BED bed;
        MakeBedFromBam(bam, chrom, bed_blocks, bed);
        bool overlapsFound = false;
        if ((_bamOutput == true) && (_obeySplits == false))
        {
            overlapsFound = _bedB->anyHits(bed.chrom, bed.start, bed.end, 
                                           bed.strand, _sameStrand, _diffStrand,
                                           _overlapFraction, _reciprocal);
        }
        else if ( ((_bamOutput == true)  && (_obeySplits == true)) ||
                  ((_bamOutput == false) && (_obeySplits == true)) )
        {
            // find the hits that overlap with the full span of the blocked BED
            _bedB->allHits(bed.chrom, bed.start, bed.end, bed.strand,
                           hits, _sameStrand, _diffStrand,
                           _overlapFraction, _reciprocal);
            // find the overlaps between the block in A and B
            overlapsFound = FindBlockedOverlaps(bed, bed_blocks, hits, _bamOutput);
        }
        else if ((_bamOutput == false) && (_obeySplits == false))
        {
            FindOverlaps(bed, hits);
        }
        // save the BAM alignment if overlap reqs. were met
        if (_bamOutput == true) {
            if ((overlapsFound == true) && (_noHit == false))
                writer.SaveAlignment(bam);
            else if ((overlapsFound == false) && (_noHit == true))
                writer.SaveAlignment(bam);
        }
        hits.clear();
    }

    // close the relevant BAM files.
    reader.Close();
    if (_bamOutput == true) {
        writer.Close();
    }
}
Example #8
0
void BedIntersect::IntersectBed() {

    // create new BED file objects for A and B
    _bedA = new BedFile(_bedAFile);
    _bedB = new BedFile(_bedBFile);

    if (_sortedInput == false) {
        // load the "B" file into a map in order to
        // compare each entry in A to it in search of overlaps.
        _bedB->loadBedFileIntoMap();

        vector<BED> hits;
        hits.reserve(100);
        BED a;

        // open the "A" file, process each BED entry and searh for overlaps.
        _bedA->Open();
        // report A's header first if asked.
        if (_printHeader == true) {
            _bedA->PrintHeader();
        }
        while (_bedA->GetNextBed(a)) {
            if (_bedA->_status == BED_VALID) {
                // treat the BED as a single "block"
                if (_obeySplits == false)
                    FindOverlaps(a, hits);
                // split the BED12 into blocks and look for overlaps in each discrete block
                else {
                    // find the hits that overlap with the full span of the blocked BED
                    _bedB->allHits(a.chrom, a.start, a.end, a.strand,
                                   hits, _sameStrand, _diffStrand,
                                   _overlapFraction, _reciprocal);
                    // break a into discrete blocks, as we need to 
                    // measure overlap with the individual blocks, not the full span.
                    bedVector a_blocks; 
                    GetBedBlocks(a, a_blocks);
                    // find the overlaps between the block in A and B 
                    // last parm is false as a is not a BAM entry
                    FindBlockedOverlaps(a, a_blocks, hits, false);
                }
                hits.clear();
            }
        }
        _bedA->Close();
    }
    else {
        // use the chromsweep algorithm to detect overlaps on the fly.
        ChromSweep sweep = ChromSweep(_bedA, _bedB, 
                                      _sameStrand, _diffStrand, 
                                      _overlapFraction, _reciprocal,
                                      _printHeader);

        pair<BED, vector<BED> > hit_set;
        hit_set.second.reserve(10000);
        while (sweep.Next(hit_set)) {
            if (_obeySplits == false)
                processHits(hit_set.first, hit_set.second);
            else {
                bedVector a_blocks; 
                GetBedBlocks(hit_set.first, a_blocks);
                FindBlockedOverlaps(hit_set.first, a_blocks, hit_set.second, false);
            }
        }
    }
}
Example #9
0
void BedIntersect::IntersectBam(string bamFile) {

	// load the "B" bed file into a map so
	// that we can easily compare "A" to it for overlaps
	_bedB->loadBedFileIntoMap();
	
	// open the BAM file
	BamReader reader;
	BamWriter writer;
	reader.Open(bamFile);

	// get header & reference information
	string header  = reader.GetHeaderText();
	RefVector refs = reader.GetReferenceData();

	// open a BAM output to stdout if we are writing BAM
	if (_bamOutput == true) {
		// open our BAM writer
        writer.Open("stdout", header, refs, _isUncompressedBam);
	}

	vector<BED> hits;
	// reserve some space
	hits.reserve(100);
	
	_bedA->bedType = 6;
	BamAlignment bam;	
	// get each set of alignments for each pair.
	while (reader.GetNextAlignment(bam)) {
		
		if (bam.IsMapped()) {	
			BED a;
			a.chrom = refs.at(bam.RefID).RefName;
			a.start = bam.Position;
			a.end   = bam.GetEndPosition(false);

			// build the name field from the BAM alignment.
			a.name = bam.Name;
			if (bam.IsFirstMate()) a.name += "/1";
			if (bam.IsSecondMate()) a.name += "/2";

			a.score  = ToString(bam.MapQuality);
			
			a.strand = "+"; 
			if (bam.IsReverseStrand()) a.strand = "-"; 
	
			if (_bamOutput == true) {
			    bool overlapsFound = false;
			    // treat the BAM alignment as a single "block"
			    if (_obeySplits == false) {
				    overlapsFound = FindOneOrMoreOverlap(a);
				}
				// split the BAM alignment into discrete blocks and
				// look for overlaps only within each block.
				else {
                    bool overlapFoundForBlock;
				    bedVector bedBlocks;  // vec to store the discrete BED "blocks" from a
				    // we don't want to split on "D" ops, hence the "false"
                    getBamBlocks(bam, refs, bedBlocks, false);
                    
                    vector<BED>::const_iterator bedItr  = bedBlocks.begin();
                	vector<BED>::const_iterator bedEnd  = bedBlocks.end();
                	for (; bedItr != bedEnd; ++bedItr) {
            	        overlapFoundForBlock = FindOneOrMoreOverlap(a);
            	        if (overlapFoundForBlock == true)
                            overlapsFound = true;
            	    }
				}
				if (overlapsFound == true) {
					if (_noHit == false)
						writer.SaveAlignment(bam);
				}
				else {
					if (_noHit == true) {
						writer.SaveAlignment(bam);
					}	
				}
			}
			else {
			    // treat the BAM alignment as a single BED "block"
			    if (_obeySplits == false) {
				    FindOverlaps(a, hits);
				    hits.clear();
			    }
			    // split the BAM alignment into discrete BED blocks and
				// look for overlaps only within each block.
			    else {
			        bedVector bedBlocks;  // vec to store the discrete BED "blocks" from a
                    getBamBlocks(bam, refs, bedBlocks, false);

                    vector<BED>::const_iterator bedItr  = bedBlocks.begin();
                	vector<BED>::const_iterator bedEnd  = bedBlocks.end();
                	for (; bedItr != bedEnd; ++bedItr) {
            	        FindOverlaps(*bedItr, hits);
                        hits.clear();
            	    }
			    }
			}
		}
	}
	
	// close the relevant BAM files.
	reader.Close();
	if (_bamOutput == true) {
		writer.Close();
	}
}