bool HeaderTool::HeaderToolPrivate::Run(void) { // set to default input if none provided if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); // add files in the filelist to the input file list if ( m_settings->HasInputFilelist ) { ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); if ( !filelist.is_open() ) { cerr << "bamtools header ERROR: could not open input BAM file list... Aborting." << endl; return false; } string line; while ( getline(filelist, line) ) m_settings->InputFiles.push_back(line); } // attemp to open BAM files BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools header ERROR: could not open BAM file(s) for reading... Aborting." << endl; return false; } // dump (merged) header contents to stdout cout << reader.GetHeaderText() << endl; // clean up & exit reader.Close(); return true; }
// merges sorted temp BAM files into single sorted output BAM file bool SortTool::SortToolPrivate::MergeSortedRuns(void) { // open up multi reader for all of our temp files // this might get broken up if we do a multi-pass system later ?? BamMultiReader multiReader; if ( !multiReader.Open(m_tempFilenames) ) { cerr << "bamtools sort ERROR: could not open BamMultiReader for merging temp files... Aborting." << endl; return false; } // set sort order for merge if ( m_settings->IsSortingByName ) multiReader.SetSortOrder(BamMultiReader::SortedByReadName); else multiReader.SetSortOrder(BamMultiReader::SortedByPosition); // open writer for our completely sorted output BAM file BamWriter mergedWriter; if ( !mergedWriter.Open(m_settings->OutputBamFilename, m_headerText, m_references) ) { cerr << "bamtools sort ERROR: could not open " << m_settings->OutputBamFilename << " for writing... Aborting." << endl; multiReader.Close(); return false; } // while data available in temp files BamAlignment al; while ( multiReader.GetNextAlignmentCore(al) ) mergedWriter.SaveAlignment(al); // close readers multiReader.Close(); mergedWriter.Close(); // delete all temp files vector<string>::const_iterator tempIter = m_tempFilenames.begin(); vector<string>::const_iterator tempEnd = m_tempFilenames.end(); for ( ; tempIter != tempEnd; ++tempIter ) { const string& tempFilename = (*tempIter); remove(tempFilename.c_str()); } return true; }
bool StatsTool::StatsToolPrivate::Run() { // opens the BAM files without checking for indexes BamMultiReader reader; if ( !reader.Open(settings->InputFiles, false, true) ) { cerr << "Could not open input BAM file(s)... quitting." << endl; reader.Close(); return false; } // plow through file, keeping track of stats BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) ProcessAlignment(al); // print stats PrintStats(); // clean and exit reader.Close(); return true; }
bool ConvertTool::ConvertToolPrivate::Run(void) { // ------------------------------------ // initialize conversion input/output // set to default input if none provided if ( !m_settings->HasInput ) m_settings->InputFiles.push_back(Options::StandardIn()); // open input files BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools convert ERROR: could not open input BAM file(s)... Aborting." << endl; return false; } // if input is not stdin & a region is provided, look for index files if ( m_settings->HasInput && m_settings->HasRegion ) { if ( !reader.LocateIndexes() ) { cerr << "bamtools convert ERROR: could not locate index file(s)... Aborting." << endl; return false; } } // retrieve reference data m_references = reader.GetReferenceData(); // set region if specified BamRegion region; if ( m_settings->HasRegion ) { if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { if ( reader.HasIndexes() ) { if ( !reader.SetRegion(region) ) { cerr << "bamtools convert ERROR: set region failed. Check that REGION describes a valid range" << endl; reader.Close(); return false; } } } else { cerr << "bamtools convert ERROR: could not parse REGION: " << m_settings->Region << endl; cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" << endl; reader.Close(); return false; } } // if output file given ofstream outFile; if ( m_settings->HasOutput ) { // open output file stream outFile.open(m_settings->OutputFilename.c_str()); if ( !outFile ) { cerr << "bamtools convert ERROR: could not open " << m_settings->OutputFilename << " for output" << endl; return false; } // set m_out to file's streambuf m_out.rdbuf(outFile.rdbuf()); } // ------------------------------------- // do conversion based on format bool convertedOk = true; // pileup is special case // conversion not done per alignment, like the other formats if ( m_settings->Format == FORMAT_PILEUP ) convertedOk = RunPileupConversion(&reader); // all other formats else { bool formatError = false; // set function pointer to proper conversion method void (BamTools::ConvertTool::ConvertToolPrivate::*pFunction)(const BamAlignment&) = 0; if ( m_settings->Format == FORMAT_BED ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBed; else if ( m_settings->Format == FORMAT_FASTA ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFasta; else if ( m_settings->Format == FORMAT_FASTQ ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFastq; else if ( m_settings->Format == FORMAT_JSON ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintJson; else if ( m_settings->Format == FORMAT_SAM ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintSam; else if ( m_settings->Format == FORMAT_YAML ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintYaml; else { cerr << "bamtools convert ERROR: unrecognized format: " << m_settings->Format << endl; cerr << "Please see documentation for list of supported formats " << endl; formatError = true; convertedOk = false; } // if format selected ok if ( !formatError ) { // if SAM format & not omitting header, print SAM header first if ( (m_settings->Format == FORMAT_SAM) && !m_settings->IsOmittingSamHeader ) m_out << reader.GetHeaderText(); // iterate through file, doing conversion BamAlignment a; while ( reader.GetNextAlignment(a) ) (this->*pFunction)(a); // set flag for successful conversion convertedOk = true; } } // ------------------------ // clean up & exit reader.Close(); if ( m_settings->HasOutput ) outFile.close(); return convertedOk; }
bool RandomTool::RandomToolPrivate::Run(void) { // set to default stdin if no input files provided if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); // add files in the filelist to the input file list if ( m_settings->HasInputFilelist ) { ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); if ( !filelist.is_open() ) { cerr << "bamtools random ERROR: could not open input BAM file list... Aborting." << endl; return false; } string line; while ( getline(filelist, line) ) m_settings->InputFiles.push_back(line); } // open our reader BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools random ERROR: could not open input BAM file(s)... Aborting." << endl; return false; } // look up index files for all BAM files reader.LocateIndexes(); // make sure index data is available if ( !reader.HasIndexes() ) { cerr << "bamtools random ERROR: could not load index data for all input BAM file(s)... Aborting." << endl; reader.Close(); return false; } // get BamReader metadata const string headerText = reader.GetHeaderText(); const RefVector references = reader.GetReferenceData(); if ( references.empty() ) { cerr << "bamtools random ERROR: no reference data available... Aborting." << endl; reader.Close(); return false; } // determine compression mode for BamWriter bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); BamWriter::CompressionMode compressionMode = BamWriter::Compressed; if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed; // open BamWriter BamWriter writer; writer.SetCompressionMode(compressionMode); if ( !writer.Open(m_settings->OutputFilename, headerText, references) ) { cerr << "bamtools random ERROR: could not open " << m_settings->OutputFilename << " for writing... Aborting." << endl; reader.Close(); return false; } // if user specified a REGION constraint, attempt to parse REGION string BamRegion region; if ( m_settings->HasRegion && !Utilities::ParseRegionString(m_settings->Region, reader, region) ) { cerr << "bamtools random ERROR: could not parse REGION: " << m_settings->Region << endl; cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" << endl; reader.Close(); writer.Close(); return false; } // seed our random number generator srand( time(NULL) ); // grab random alignments BamAlignment al; unsigned int i = 0; while ( i < m_settings->AlignmentCount ) { int randomRefId = 0; int randomPosition = 0; // use REGION constraints to select random refId & position if ( m_settings->HasRegion ) { // select a random refId randomRefId = getRandomInt(region.LeftRefID, region.RightRefID); // select a random position based on randomRefId const int lowerBoundPosition = ( (randomRefId == region.LeftRefID) ? region.LeftPosition : 0 ); const int upperBoundPosition = ( (randomRefId == region.RightRefID) ? region.RightPosition : (references.at(randomRefId).RefLength - 1) ); randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition); } // otherwise select from all possible random refId & position else { // select random refId randomRefId = getRandomInt(0, (int)references.size() - 1); // select random position based on randomRefId const int lowerBoundPosition = 0; const int upperBoundPosition = references.at(randomRefId).RefLength - 1; randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition); } // if jump & read successful, save first alignment that overlaps random refId & position if ( reader.Jump(randomRefId, randomPosition) ) { while ( reader.GetNextAlignmentCore(al) ) { if ( al.RefID == randomRefId && al.Position >= randomPosition ) { writer.SaveAlignment(al); ++i; break; } } } } // cleanup & exit reader.Close(); writer.Close(); return true; }
int CountTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); // if no '-in' args supplied, default to stdin if ( !m_settings->HasInput ) m_settings->InputFiles.push_back(Options::StandardIn()); // open reader without index BamMultiReader reader; if (!reader.Open(m_settings->InputFiles, false, true)) { cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; return 1; } // alignment counter BamAlignment al; int alignmentCount(0); // if no region specified, count entire file if ( !m_settings->HasRegion ) { while ( reader.GetNextAlignmentCore(al) ) ++alignmentCount; } // otherwise attempt to use region as constraint else { // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { // attempt to re-open reader with index files reader.Close(); bool openedOK = reader.Open(m_settings->InputFiles, true, true ); // if error if ( !openedOK ) { cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; return 1; } // if index data available, we can use SetRegion if ( reader.IsIndexLoaded() ) { // attempt to use SetRegion(), if failed report error if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl; reader.Close(); return 1; } // everything checks out, just iterate through specified region, counting alignments while ( reader.GetNextAlignmentCore(al) ) ++alignmentCount; } // no index data available, we have to iterate through until we // find overlapping alignments else { while( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { ++alignmentCount; } } } } // error parsing REGION string else { cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl; cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl; reader.Close(); return 1; } } // print results cout << alignmentCount << endl; // clean & exit reader.Close(); return 0; }
bool ConvertTool::ConvertToolPrivate::Run(void) { // ------------------------------------ // initialize conversion input/output // set to default input if none provided if ( !m_settings->HasInput ) m_settings->InputFiles.push_back(Options::StandardIn()); // open input files BamMultiReader reader; if ( !m_settings->HasInput ) { // don't attempt to open index for stdin if ( !reader.Open(m_settings->InputFiles, false) ) { cerr << "Could not open input files" << endl; return false; } } else { if ( !reader.Open(m_settings->InputFiles, true) ) { if ( !reader.Open(m_settings->InputFiles, false) ) { cerr << "Could not open input files" << endl; return false; } else { cerr << "Opened reader without index file, jumping is disabled." << endl; } } } m_references = reader.GetReferenceData(); // set region if specified BamRegion region; if ( m_settings->HasRegion ) { if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { if ( !reader.SetRegion(region) ) { cerr << "Could not set BamReader region to REGION: " << m_settings->Region << endl; return false; } } else { cerr << "Could not parse REGION: " << m_settings->Region << endl; return false; } } // if output file given ofstream outFile; if ( m_settings->HasOutput ) { // open output file stream outFile.open(m_settings->OutputFilename.c_str()); if ( !outFile ) { cerr << "Could not open " << m_settings->OutputFilename << " for output" << endl; return false; } // set m_out to file's streambuf m_out.rdbuf(outFile.rdbuf()); } // ------------------------------------- // do conversion based on format bool convertedOk = true; // pileup is special case // conversion not done per alignment, like the other formats if ( m_settings->Format == FORMAT_PILEUP ) convertedOk = RunPileupConversion(&reader); // all other formats else { bool formatError = false; // set function pointer to proper conversion method void (BamTools::ConvertTool::ConvertToolPrivate::*pFunction)(const BamAlignment&) = 0; if ( m_settings->Format == FORMAT_BED ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBed; else if ( m_settings->Format == FORMAT_BEDGRAPH ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBedGraph; else if ( m_settings->Format == FORMAT_FASTA ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFasta; else if ( m_settings->Format == FORMAT_FASTQ ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFastq; else if ( m_settings->Format == FORMAT_JSON ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintJson; else if ( m_settings->Format == FORMAT_SAM ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintSam; else if ( m_settings->Format == FORMAT_WIGGLE ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintWiggle; else if ( m_settings->Format == FORMAT_YAML ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintYaml; else { cerr << "Unrecognized format: " << m_settings->Format << endl; cerr << "Please see help|README (?) for details on supported formats " << endl; formatError = true; convertedOk = false; } // if format selected ok if ( !formatError ) { // if SAM format & not omitting header, print SAM header first if ( (m_settings->Format == FORMAT_SAM) && !m_settings->IsOmittingSamHeader ) m_out << reader.GetHeaderText(); // iterate through file, doing conversion BamAlignment a; while ( reader.GetNextAlignment(a) ) (this->*pFunction)(a); // set flag for successful conversion convertedOk = true; } } // ------------------------ // clean up & exit reader.Close(); if ( m_settings->HasOutput ) outFile.close(); return convertedOk; }
bool FilterTool::FilterToolPrivate::Run(void) { // set to default input if none provided if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); // add files in the filelist to the input file list if ( m_settings->HasInputFilelist ) { ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); if ( !filelist.is_open() ) { cerr << "bamtools filter ERROR: could not open input BAM file list... Aborting." << endl; return false; } string line; while ( getline(filelist, line) ) m_settings->InputFiles.push_back(line); } // initialize defined properties & user-specified filters // quit if failed if ( !SetupFilters() ) return false; // open reader without index BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools filter ERROR: could not open input files for reading." << endl; return false; } // retrieve reader header & reference data const string headerText = reader.GetHeaderText(); filterToolReferences = reader.GetReferenceData(); // determine compression mode for BamWriter bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); BamWriter::CompressionMode compressionMode = BamWriter::Compressed; if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed; // open BamWriter BamWriter writer; writer.SetCompressionMode(compressionMode); if ( !writer.Open(m_settings->OutputFilename, headerText, filterToolReferences) ) { cerr << "bamtools filter ERROR: could not open " << m_settings->OutputFilename << " for writing." << endl; reader.Close(); return false; } // if no region specified, filter entire file BamAlignment al; if ( !m_settings->HasRegion ) { while ( reader.GetNextAlignment(al) ) { if ( CheckAlignment(al) ) writer.SaveAlignment(al); } } // otherwise attempt to use region as constraint else { // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { // attempt to find index files reader.LocateIndexes(); // if index data available for all BAM files, we can use SetRegion if ( reader.HasIndexes() ) { // attempt to use SetRegion(), if failed report error if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { cerr << "bamtools filter ERROR: set region failed. Check that REGION describes a valid range" << endl; reader.Close(); return false; } // everything checks out, just iterate through specified region, filtering alignments while ( reader.GetNextAlignment(al) ) if ( CheckAlignment(al) ) writer.SaveAlignment(al); } // no index data available, we have to iterate through until we // find overlapping alignments else { while ( reader.GetNextAlignment(al) ) { if ( (al.RefID >= region.LeftRefID) && ((al.Position + al.Length) >= region.LeftPosition) && (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { if ( CheckAlignment(al) ) writer.SaveAlignment(al); } } } } // error parsing REGION string else { cerr << "bamtools filter ERROR: could not parse REGION: " << m_settings->Region << endl; cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" << endl; reader.Close(); return false; } } // clean up & exit reader.Close(); writer.Close(); return true; }
bool MergeTool::MergeToolPrivate::Run(void) { // set to default input if none provided if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn()); // opens the BAM files (by default without checking for indexes) BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools merge ERROR: could not open input BAM file(s)... Aborting." << endl; return false; } // retrieve header & reference dictionary info std::string mergedHeader = reader.GetHeaderText(); RefVector references = reader.GetReferenceData(); // determine compression mode for BamWriter bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); BamWriter::CompressionMode compressionMode = BamWriter::Compressed; if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed; // open BamWriter BamWriter writer; writer.SetCompressionMode(compressionMode); if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references) ) { cerr << "bamtools merge ERROR: could not open " << m_settings->OutputFilename << " for writing." << endl; reader.Close(); return false; } // if no region specified, store entire contents of file(s) if ( !m_settings->HasRegion ) { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); } // otherwise attempt to use region as constraint else { // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { // attempt to find index files reader.LocateIndexes(); // if index data available for all BAM files, we can use SetRegion if ( reader.HasIndexes() ) { // attempt to use SetRegion(), if failed report error if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { cerr << "bamtools merge ERROR: set region failed. Check that REGION describes a valid range" << endl; reader.Close(); return false; } // everything checks out, just iterate through specified region, storing alignments BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); } // no index data available, we have to iterate through until we // find overlapping alignments else { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { writer.SaveAlignment(al); } } } } // error parsing REGION string else { cerr << "bamtools merge ERROR: could not parse REGION - " << m_settings->Region << endl; cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" << endl; reader.Close(); writer.Close(); return false; } } // clean & exit reader.Close(); writer.Close(); return true; }
int main ( int argc, char *argv[] ) { struct parameters *param = 0; param = interface(param, argc, argv); //region file input (the region file should be sorted as the same way as the bam file) ifstream region_f; region_f.open(param->region_f, ios_base::in); // the region file is opened //bam input and generate index if not yet //-------------------------------------------------------------------------------------------------------+ // BAM input (file or filenames?) | //-------------------------------------------------------------------------------------------------------+ char *fof = param->mapping_f; FILE *IN=NULL; char linefof[5000]; int filecount=0; vector <string> fnames; if (strchr(fof,' ')!=NULL) { char *ptr; ptr=strtok(fof," "); while (ptr!=NULL) { fnames.push_back(ptr); filecount++; ptr=strtok(NULL," "); } } else { IN=fopen(fof,"rt"); if (IN!=NULL) { long linecount=0; while (fgets(linefof,5000-1,IN)!=NULL) { linecount++; if (linefof[0]!='#' && linefof[0]!='\n') { char *ptr=strchr(linefof,'\n'); if (ptr!=NULL && ptr[0]=='\n') { ptr[0]='\0'; } FILE *dummy=NULL; dummy=fopen(linefof,"rt"); if (dummy!=NULL) { // seems to be a file of filenames... fclose(dummy); fnames.push_back(linefof); filecount++; } else if (filecount==0 || linecount>=1000-1) { // seems to be a single file fnames.push_back(fof); filecount++; break; } } } fclose(IN); } } //file or file name decided and stored in vector "fnames" cerr << "the input mapping files are:" << endl; vector <string>::iterator fit = fnames.begin(); for(; fit != fnames.end(); fit++) { cerr << *fit << endl; } //-------------------------------------------------------------------------------------------------------+ // end of file or filenames | //-------------------------------------------------------------------------------------------------------+ // open the BAM file(s) BamMultiReader reader; reader.Open(fnames); // get header & reference information string header = reader.GetHeaderText(); RefVector refs = reader.GetReferenceData(); if ( ! reader.LocateIndexes() ) // opens any existing index files that match our BAM files reader.CreateIndexes(); // creates index files for BAM files that still lack one // locus bias struct lb empty_profile = {0,0,0,0}; vector <struct lb> locus_b(1000, empty_profile); // output locus bias file string locus_bias_set = param->lbias; ofstream locus_bias; if ( locus_bias_set != "" ) { locus_bias.open(param->lbias); if ( !locus_bias ) { cerr << "can not open locus_bias file.\n"; exit(0); } } //should decide which chromosome string line; string old_chr = "SRP"; string type = param->type; //whether do some position-level pile-up stuff bool posc = false; ofstream posc_f; ofstream chrmap_f; string poscset = param->posc; if ( poscset != "" ) { posc = true; posc_f.open(param->posc); chrmap_f.open(param->chrmap); } bool noChr; if ( param->nochr == 1 ){ noChr = true; } else { noChr = false; } //regions for the input of region file deque <struct region> regions; getline(region_f, line); //get the first line eatline(line,regions,noChr); deque <struct region>::iterator it = regions.begin(); while ( it->chr != old_chr ) { old_chr = it->chr; // set the current chr as old chr int chr_id = reader.GetReferenceID(it->chr); if ( chr_id == -1 ) { //reference not found for (; it != regions.end() && it->chr == old_chr; ) { gene_processing(*it,locus_b); // print the old region info it = regions.erase(it); // erase the current region } while ( regions.empty() ) { getline(region_f, line); if ( region_f.eof() ){ cerr << "finished: end of region file, zone 0" << endl; break; } eatline(line, regions,noChr); it = regions.begin(); if (it->chr == old_chr){ gene_processing(*it,locus_b); regions.clear(); continue; } } continue; } int chr_len = refs.at(chr_id).RefLength; if ( !reader.SetRegion(chr_id, 1, chr_id, chr_len) ) // here set region { cerr << "bamtools count ERROR: Jump region failed " << it->chr << endl; reader.Close(); exit(1); } //pile-up pos stats set <string> fragment; map <string, unsigned int> pileup; bool isposPileup = false; unsigned int old_start = 0; unsigned int total_tags = 0; unsigned int total_pos = 0; unsigned int pileup_pos = 0; BamAlignment bam; while (reader.GetNextAlignment(bam)) { if ( bam.IsMapped() == false ) continue; // skip unaligned reads unsigned int unique; bam.GetTag("NH", unique); if (param->unique == 1) { if (unique != 1) { // skipe uniquelly mapped reads continue; } } if (read_length == 0){ read_length = bam.Length; } //cout << bam.Name << endl; string chrom = refs.at(bam.RefID).RefName; string strand = "+"; if (bam.IsReverseStrand()) strand = "-"; unsigned int alignmentStart = bam.Position+1; unsigned int mateStart; if (type == "p") mateStart = bam.MatePosition+1; unsigned int alignmentEnd = bam.GetEndPosition(); unsigned int cigarEnd; vector <int> blockLengths; vector <int> blockStarts; blockStarts.push_back(0); ParseCigar(bam.CigarData, blockStarts, blockLengths, cigarEnd); // position check for unique mapped reads (because is paired-end reads, shoule base on fragment level for paired end reads) if (posc == true && unique == 1) { if (type == "p" && fragment.count(bam.Name) > 0) fragment.erase(bam.Name); else { total_tags++; if (type == "p"){ fragment.insert(bam.Name); } string alignSum; if (type == "p") { alignSum = int2str(alignmentStart) + "\t" + int2str(mateStart) + "\t.\t" + strand; } else { alignSum = int2str(alignmentStart) + "\t" + int2str(alignmentEnd) + "\t.\t" + strand; } if ( alignmentStart != old_start ) { isposPileup = false; map <string, unsigned int>::iterator pit = pileup.begin(); for (; pit != pileup.end(); pit++) { posc_f << chrom << "\truping\tpileup\t" << pit->first << "\t.\t" << "Pileup=" << pit->second << endl; //print pileup } pileup.clear(); //clear pileup set pileup.insert( pair <string, unsigned int> (alignSum, 1) ); //insert the new read total_pos++; } else if ( alignmentStart == old_start ) { // same starts if ( pileup.count(alignSum) > 0 ) { // pileup if ( pileup[alignSum] == 1 && isposPileup == false ) { pileup_pos++; isposPileup = true; } pileup[alignSum]++; } else { pileup.insert( pair <string, unsigned int> (alignSum, 1) ); } } //same starts } //new fragment old_start = alignmentStart; } // do pos check float incre = 1.; if (blockStarts.size() > 1) incre = 0.5; // incre half for junction reads incre /= static_cast < float >(unique); // for multi aligned reads deque <struct region>::iterator iter = regions.begin(); if ( iter->start > alignmentEnd ) continue; // skip reads not overlapping with the first region while ( iter->chr == old_chr && iter->start <= alignmentEnd && iter != regions.end() ) { if (iter->end < alignmentStart) { // the region end is beyond the alignmentStart gene_processing(*iter,locus_b); // processing iter = regions.erase(iter); // this region should be removed if ( regions.empty() ) { getline(region_f, line); // get a line of region file if ( ! region_f.eof() ) { eatline(line, regions, noChr); // eat a line and put it into the duque iter = regions.begin(); } else { // it's reaching the end of the region file cerr << "finished: end of region file, zone 3" << endl; break; } } continue; } if (iter->end >= alignmentStart && iter->start <= alignmentEnd) { //overlapping, should take action vector <int>::iterator cigit = blockStarts.begin(); for (; cigit != blockStarts.end(); cigit++) { unsigned int current_start = *cigit + alignmentStart; int current_pos = current_start - (iter->start); //cout << iter->chr << "\t" << iter->start << "\t" << iter->end << "\t" << current_start << endl; if ( (iter->tags).count(current_pos) > 0 ) { (iter->tags)[current_pos] += incre; } else (iter->tags).insert( pair<int, float>(current_pos, incre) ); } } // overlapping take action! if ( (iter+1) != regions.end() ) iter++; // if this region is not the last element in the deque else { // the last element getline(region_f, line); // get a line of region file if ( ! region_f.eof() ){ eatline(line, regions, noChr); // eat a line and put it into the duque iter = regions.end(); iter--; } else { //it's reaching the end of the region file cerr << "finished: end of region file, zone 4" << endl; break; } } } //while } // read a bam // print chr map if (posc == true) { chrmap_f << old_chr << "\t" << total_tags << "\t" << total_pos << "\t" << pileup_pos << endl; } //somehow to loop back it = regions.begin(); //reset to begin for (; it != regions.end() && it->chr == old_chr; ) { gene_processing(*it,locus_b); // print the old region info it = regions.erase(it); // erase the current region } while ( regions.empty() ) { getline(region_f, line); if ( region_f.eof() ){ cerr << "finished: end of region file, zone 5" << endl; //print locus bias for (unsigned int l = 0; l < 1000; l++){ locus_bias << l << "\t" << locus_b[l].ps << "\t" << locus_b[l].hs << "\t" << locus_b[l].pe << "\t" << locus_b[l].he << endl; } exit(0); } eatline(line, regions, noChr); it = regions.begin(); if (it->chr == old_chr){ gene_processing(*it, locus_b); regions.clear(); continue; } } } // region chr != old chr regions.clear(); reader.Close(); region_f.close(); return 0; } //main
int FileReader::runInternal() { ogeNameThread("am_FileReader"); if(!format_specified) format = deduceFileFormat(); if(format == FORMAT_BAM) { BamMultiReader reader; if(!reader.Open(filenames)) { cerr << "Error opening BAM files." << endl; reader.Close(); return -1; } header = reader.GetHeader(); references = reader.GetReferenceData(); open = true; BamAlignment * al; while(true) { if(load_string_data) al = reader.GetNextAlignment(); else al = reader.GetNextAlignmentCore(); if(!al) break; putOutputAlignment(al); } reader.Close(); } else if(format == FORMAT_SAM) { vector<SamReader> readers; SamHeader first_header; // before doing any reading, open the files to // verify they are the right format, etc. for(int i = 0; i < filenames.size(); i++) { SamReader reader; if(!reader.Open(filenames[i])) { cerr << "Error opening SAM file: " << filenames[i] << endl; return -1; } if(filenames.size() > 1 && i == 0) first_header = header; // TODO: We can probably find a better way to deal with multiple SAM file headers, // but for now we should disallow different headers to avoid issues. if(i > 0 && header.ToString() != first_header.ToString()) cerr << "Warning! SAM input files have different headers." << endl; reader.Close(); } for(int i = 0; i < filenames.size(); i++) { SamReader reader; if(!reader.Open(filenames[i])) { cerr << "Error opening SAM file: " << filenames[i] << endl; return -1; } header = reader.GetHeader(); references = reader.GetReferenceData(); open = true; if(filenames.size() > 1 && i == 0) first_header = header; BamAlignment * al = NULL; while(true) { al = reader.GetNextAlignment(); if(NULL == al) break; putOutputAlignment(al); } reader.Close(); } } else { cerr << "FileReader couldn't detect file format. Aborting." << endl; exit(-1); return -1; } return 0; }
int MergeTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); // set to default input if none provided if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn()); // opens the BAM files (by default without checking for indexes) BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles, false, true) ) { cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; return 1; } // retrieve header & reference dictionary info std::string mergedHeader = reader.GetHeaderText(); RefVector references = reader.GetReferenceData(); // open writer BamWriter writer; bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references, writeUncompressed) ) { cerr << "ERROR: Could not open BAM file " << m_settings->OutputFilename << " for writing... Aborting." << endl; reader.Close(); return 1; } // if no region specified, store entire contents of file(s) if ( !m_settings->HasRegion ) { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); } // otherwise attempt to use region as constraint else { // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { // attempt to re-open reader with index files reader.Close(); bool openedOK = reader.Open(m_settings->InputFiles, true, true ); // if error if ( !openedOK ) { cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; return 1; } // if index data available, we can use SetRegion if ( reader.IsIndexLoaded() ) { // attempt to use SetRegion(), if failed report error if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl; reader.Close(); return 1; } // everything checks out, just iterate through specified region, storing alignments BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); } // no index data available, we have to iterate through until we // find overlapping alignments else { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { writer.SaveAlignment(al); } } } } // error parsing REGION string else { cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl; cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl; reader.Close(); writer.Close(); return 1; } } // clean & exit reader.Close(); writer.Close(); return 0; }