Esempio n. 1
0
// =========================================================
// Convenience method for reporting merged blocks by strand
// =========================================================
void BedMerge::ReportStranded(string chrom, int start, int end, 
                              const vector<string> &names, const vector<string> &scores,
                              int mergeCount, string strand) 
{
    if (_bed->isZeroBased == false) {start++;}
    
    printf("%s\t%d\t%d", chrom.c_str(), start, end);
    // just the merged intervals
    if (_numEntries == false && _reportNames == false && _reportScores == false) {
        printf("\t%s\n", strand.c_str());
    }
    // merged intervals and counts    
    else if (_numEntries == true && _reportNames == false && _reportScores == false) {
        printf("\t%d\t%s\n", mergeCount, strand.c_str());
    }
    // merged intervals and names        
    else if (_numEntries == false && _reportNames == true && _reportScores == false) {
        ReportMergedNames(names);
        printf("\t%s\n", strand.c_str());
    }
    // merged intervals and scores        
    else if (_numEntries == false && _reportNames == false && _reportScores == true) {
        ReportMergedScores(scores);
        printf("\t%s\n", strand.c_str());
    }
    // merged intervals, names, and scores        
    else if (_numEntries == false && _reportNames == true && _reportScores == true) {
        ReportMergedNames(names);
        ReportMergedScores(scores);
        printf("\t%s\n", strand.c_str());
    }
}
Esempio n. 2
0
// ===============================================
// Convenience method for reporting merged blocks
// ================================================
void BedMerge::Report(string chrom, int start, 
                      int end, const vector<string> &names, 
                      const vector<string> &scores, int mergeCount) 
{
    // ARQ: removed to force all output to be zero-based, BED format, reagrdless of input type
    //if (_bed->isZeroBased == false) {start++;}
    
    printf("%s\t%d\t%d", chrom.c_str(), start, end);
    // just the merged intervals
    if (_numEntries == false && _reportNames == false && 
        _reportScores == false) {
        printf("\n");
    }
    // merged intervals and counts    
    else if (_numEntries == true && _reportNames == false && 
        _reportScores == false) {
        printf("\t%d\n", mergeCount);
    }
    // merged intervals, counts, and scores
    else if (_numEntries == true && _reportNames == false && 
        _reportScores == true) {
        printf("\t%d", mergeCount);
        ReportMergedScores(scores);
        printf("\n");
    }
    // merged intervals, counts, and names
    else if (_numEntries == true && _reportNames == true && 
        _reportScores == false) {
        ReportMergedNames(names);
        printf("\t%d\n", mergeCount);
    }
    // merged intervals, counts, names, and scores
    else if (_numEntries == true && _reportNames == true && 
        _reportScores == true) {
        ReportMergedNames(names);
        ReportMergedScores(scores);
        printf("\t%d\n", mergeCount);
    }
    // merged intervals and names        
    else if (_numEntries == false && _reportNames == true && 
        _reportScores == false) {
        ReportMergedNames(names);
        printf("\n");
    }
    // merged intervals and scores        
    else if (_numEntries == false && _reportNames == false && 
        _reportScores == true) {
        ReportMergedScores(scores);
        printf("\n");
    }
    // merged intervals, names, and scores        
    else if (_numEntries == false && _reportNames == true && 
        _reportScores == true) {
        ReportMergedNames(names);
        ReportMergedScores(scores);
        printf("\n");
    }
}
Esempio n. 3
0
// =====================================================
// = Merge overlapping BED entries into a single entry =
// =====================================================
void BedMerge::MergeBed() {

	// load the "B" bed file into a map so
	// that we can easily compare "A" to it for overlaps
	_bed->loadBedFileIntoMapNoBin();

	// loop through each chromosome and merge their BED entries
	for (masterBedMapNoBin::iterator m = _bed->bedMapNoBin.begin(); m != _bed->bedMapNoBin.end(); ++m) {

		// bedList is already sorted by start position.
		vector<BED> bedList = m->second; 

		CHRPOS minStart = INT_MAX;
		CHRPOS maxEnd = 0;
		bool OIP = false;       // OIP = Overlap In Progress.  Lame, I realize.
		int prev = -1;
		unsigned int curr = 0;
		int mergeCount = 1;
		map<string, bool> names;

		// loop through the BED entries for this chromosome
		// and look for overlaps
		for (curr = 0; curr < bedList.size(); ++curr) {
			
			// make sure prev points to an actual element
			if (prev < 0) {
				prev = curr;
				continue;
			}

			// Is there an overlap between the current and previous entries?		
			if ( overlaps(bedList[prev].start, bedList[prev].end, 
			 			bedList[curr].start, bedList[curr].end) >= _maxDistance) {
				OIP = true;
				mergeCount++;
				minStart = min(bedList[prev].start, min(minStart, bedList[curr].start));
				maxEnd = max(bedList[prev].end, max(maxEnd, bedList[curr].end));

				names[bedList[prev].name] = true;
				names[bedList[curr].name] = true;
			}
			else if ( overlaps(minStart, maxEnd, 
							bedList[curr].start, bedList[curr].end) >= _maxDistance) {
				mergeCount++;
				minStart = min(minStart, bedList[curr].start);
				maxEnd = max(maxEnd, bedList[curr].end);
				names[bedList[curr].name] = true;
			}
			else {
				// was there an overlap befor the current entry broke it?
				if (OIP) {
					if (_numEntries) {
						cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << "\t" << mergeCount << endl;
					}
					else if (_reportNames) {
						cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << "\t";
						ReportMergedNames(names);
						cout << endl;
					}
					else {
						cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << endl;
					}
				}
				else {
					if (_numEntries) {
						cout << bedList[prev].chrom << "\t" << bedList[prev].start << "\t" << bedList[prev].end << "\t" << mergeCount << endl;
					}
					else if (_reportNames) {
						cout << bedList[prev].chrom << "\t" << bedList[prev].start << "\t" << bedList[prev].end << "\t" << bedList[prev].name << endl;
					}
					else {
						cout << bedList[prev].chrom << "\t" << bedList[prev].start << "\t" << bedList[prev].end << endl;
					}
				}

				// reset things for the next overlapping "block"
				OIP = false;
				mergeCount = 1;			
				minStart = INT_MAX;
				maxEnd = 0;
				
				names.clear();
				names[bedList[curr].name] = true;
			}
			prev = curr;
		}

		// clean up based on the last entry for the current chromosome
		if (OIP) {
			if (_numEntries) {
				cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << "\t" << mergeCount << endl;
			}
			else if (_reportNames) {
				cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << "\t";
				ReportMergedNames(names);
				cout << endl;
			}
			else {
				cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << endl;
			}
		}
		else {
			if (_numEntries) {
				cout << bedList[prev].chrom << "\t" << bedList[prev].start << "\t" << bedList[prev].end  << "\t" << mergeCount << endl;
			}
			else if (_reportNames) {
				cout << bedList[prev].chrom << "\t" << bedList[prev].start << "\t" << bedList[prev].end << "\t" << bedList[prev].name << endl;
			}
			else {
				cout << bedList[prev].chrom << "\t" << bedList[prev].start << "\t" << bedList[prev].end << endl;	
			}
		}
	}
}
Esempio n. 4
0
// ==================================================================================
// = Merge overlapping BED entries into a single entry, accounting for strandedness =
// ==================================================================================
void BedMerge::MergeBedStranded() {

	// load the "B" bed file into a map so
	// that we can easily compare "A" to it for overlaps
	_bed->loadBedFileIntoMapNoBin();

	// loop through each chromosome and merge their BED entries
	masterBedMapNoBin::const_iterator m    = _bed->bedMapNoBin.begin(); 
	masterBedMapNoBin::const_iterator mEnd = _bed->bedMapNoBin.end(); 
    for (; m != mEnd; ++m) {
		// bedList is already sorted by start position.
		vector<BED> bedList = m->second; 

		// make a list of the two strands to merge separately.
		vector<string> strands(2);
		strands[0] = "+";
		strands[1] = "-";

		// do two passes, one for each strand.
		for (unsigned int s = 0; s < strands.size(); s++) {

			CHRPOS minStart = INT_MAX;
			CHRPOS maxEnd = 0;
			bool OIP = false;       // OIP = Overlap In Progress.  Lame, I realize.
			int prev = -1;
			unsigned int curr = 0;
			int mergeCount = 1;
			int numOnStrand = 0;
			map<string, bool> names;	
			
			// loop through the BED entries for this chromosome
			// and look for overlaps
			for (curr = 0; curr < bedList.size(); ++curr) {

				// if forcing strandedness, move on if the hit
				// is not on the current strand.
				
				if (bedList[curr].strand != strands[s]) {
					continue;		// continue force the next iteration of the for loop.
				}
				else {
					numOnStrand++;
				}

				// make sure prev points to an actual element on the
				// current strand
				if (prev < 0) {
					if (bedList[curr].strand == strands[s]) {
						prev = curr;
					}
					continue;
				}
	
				if ( overlaps(bedList[prev].start, bedList[prev].end, 
				 			bedList[curr].start, bedList[curr].end) >= _maxDistance) {					
					OIP = true;
					mergeCount++;
					minStart = min(bedList[prev].start, min(minStart, bedList[curr].start));
					maxEnd = max(bedList[prev].end, max(maxEnd, bedList[curr].end));

					names[bedList[prev].name] = true;
					names[bedList[curr].name] = true;
				}
				else if ( overlaps(minStart, maxEnd, 
								bedList[curr].start, bedList[curr].end) >= _maxDistance) {
					mergeCount++;
					minStart = min(minStart, bedList[curr].start);
					maxEnd = max(maxEnd, bedList[curr].end);
					names[bedList[curr].name] = true;
				}
				else {

					// was there an overlap before the current entry broke it?
					if (OIP) {
						if (_numEntries) {
							cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << "\t" << mergeCount << "\t" << strands[s] << endl;
						}
						else if (_reportNames) {
							cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << "\t";
							ReportMergedNames(names);
							cout << "\t" << strands[s] << endl;
						}
						else {
							cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << "\t" << strands[s] << endl;
						}
					}
					else {
						if ((_numEntries) && (numOnStrand > 0)) {
							cout << bedList[prev].chrom << "\t" << bedList[prev].start << "\t" << bedList[prev].end << "\t" << mergeCount << "\t" << strands[s] << endl;
						}
						else if (_reportNames) {
							cout << bedList[prev].chrom << "\t" << bedList[prev].start << "\t" << bedList[prev].end << "\t" << bedList[prev].name << "\t" << strands[s] << endl;
						}
						else if (numOnStrand > 0) {
							cout << bedList[prev].chrom << "\t" << bedList[prev].start << "\t" << bedList[prev].end << "\t" << strands[s] << endl;
						}
					}

					// reset things for the next overlapping "block"
					OIP = false;
					mergeCount = 1;			
					minStart = INT_MAX;
					maxEnd = 0;
					names.clear();
					
					// add the name of the current element in prep for the next block
					names[bedList[curr].name] = true;
				}
				prev = curr;
			}

			// clean up based on the last entry for the current chromosome
			if (OIP) {
				if (_numEntries) {
					cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << "\t" << mergeCount << "\t" << strands[s] << endl;
				}
				else if (_reportNames) {
					cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << "\t";
					ReportMergedNames(names);
					cout << "\t" << strands[s] << endl;
				}
				else {
					cout << bedList[prev].chrom << "\t" << minStart << "\t" << maxEnd << "\t" << strands[s] << endl;
				}
			}
			else {
				if ((_numEntries) && (numOnStrand > 0)) {
					cout << bedList[prev].chrom << "\t" << bedList[prev].start << "\t" << bedList[prev].end << "\t" << mergeCount << "\t" << strands[s] << endl;
				}
				else if ((_reportNames) && (numOnStrand > 0)) {
					cout << bedList[prev].chrom << "\t" << bedList[prev].start << "\t" << bedList[prev].end << "\t" << bedList[prev].name << "\t" << strands[s] << endl;
				}
				else if (numOnStrand > 0) {
					cout << bedList[prev].chrom << "\t" << bedList[prev].start << "\t" << bedList[prev].end << "\t" << strands[s] << endl;
				}
			}
		}
	}
}