bool GetNextAlignment(BamAlignment& al, BamMultiReader& reader, int32_t refID) { bool good = reader.GetNextAlignmentCore(al); if(not good or al.RefID != refID) return false; else if(not al.IsMapped() or al.IsDuplicate() or al.IsFailedQC()) return GetNextAlignment(al, reader, refID); else return true; }
// merges sorted temp BAM files into single sorted output BAM file bool SortTool::SortToolPrivate::MergeSortedRuns(void) { // open up multi reader for all of our temp files // this might get broken up if we do a multi-pass system later ?? BamMultiReader multiReader; if ( !multiReader.Open(m_tempFilenames) ) { cerr << "bamtools sort ERROR: could not open BamMultiReader for merging temp files... Aborting." << endl; return false; } // set sort order for merge if ( m_settings->IsSortingByName ) multiReader.SetSortOrder(BamMultiReader::SortedByReadName); else multiReader.SetSortOrder(BamMultiReader::SortedByPosition); // open writer for our completely sorted output BAM file BamWriter mergedWriter; if ( !mergedWriter.Open(m_settings->OutputBamFilename, m_headerText, m_references) ) { cerr << "bamtools sort ERROR: could not open " << m_settings->OutputBamFilename << " for writing... Aborting." << endl; multiReader.Close(); return false; } // while data available in temp files BamAlignment al; while ( multiReader.GetNextAlignmentCore(al) ) mergedWriter.SaveAlignment(al); // close readers multiReader.Close(); mergedWriter.Close(); // delete all temp files vector<string>::const_iterator tempIter = m_tempFilenames.begin(); vector<string>::const_iterator tempEnd = m_tempFilenames.end(); for ( ; tempIter != tempEnd; ++tempIter ) { const string& tempFilename = (*tempIter); remove(tempFilename.c_str()); } return true; }
bool StatsTool::StatsToolPrivate::Run() { // opens the BAM files without checking for indexes BamMultiReader reader; if ( !reader.Open(settings->InputFiles, false, true) ) { cerr << "Could not open input BAM file(s)... quitting." << endl; reader.Close(); return false; } // plow through file, keeping track of stats BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) ProcessAlignment(al); // print stats PrintStats(); // clean and exit reader.Close(); return true; }
bool RandomTool::RandomToolPrivate::Run(void) { // set to default stdin if no input files provided if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); // add files in the filelist to the input file list if ( m_settings->HasInputFilelist ) { ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); if ( !filelist.is_open() ) { cerr << "bamtools random ERROR: could not open input BAM file list... Aborting." << endl; return false; } string line; while ( getline(filelist, line) ) m_settings->InputFiles.push_back(line); } // open our reader BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools random ERROR: could not open input BAM file(s)... Aborting." << endl; return false; } // look up index files for all BAM files reader.LocateIndexes(); // make sure index data is available if ( !reader.HasIndexes() ) { cerr << "bamtools random ERROR: could not load index data for all input BAM file(s)... Aborting." << endl; reader.Close(); return false; } // get BamReader metadata const string headerText = reader.GetHeaderText(); const RefVector references = reader.GetReferenceData(); if ( references.empty() ) { cerr << "bamtools random ERROR: no reference data available... Aborting." << endl; reader.Close(); return false; } // determine compression mode for BamWriter bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); BamWriter::CompressionMode compressionMode = BamWriter::Compressed; if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed; // open BamWriter BamWriter writer; writer.SetCompressionMode(compressionMode); if ( !writer.Open(m_settings->OutputFilename, headerText, references) ) { cerr << "bamtools random ERROR: could not open " << m_settings->OutputFilename << " for writing... Aborting." << endl; reader.Close(); return false; } // if user specified a REGION constraint, attempt to parse REGION string BamRegion region; if ( m_settings->HasRegion && !Utilities::ParseRegionString(m_settings->Region, reader, region) ) { cerr << "bamtools random ERROR: could not parse REGION: " << m_settings->Region << endl; cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" << endl; reader.Close(); writer.Close(); return false; } // seed our random number generator srand( time(NULL) ); // grab random alignments BamAlignment al; unsigned int i = 0; while ( i < m_settings->AlignmentCount ) { int randomRefId = 0; int randomPosition = 0; // use REGION constraints to select random refId & position if ( m_settings->HasRegion ) { // select a random refId randomRefId = getRandomInt(region.LeftRefID, region.RightRefID); // select a random position based on randomRefId const int lowerBoundPosition = ( (randomRefId == region.LeftRefID) ? region.LeftPosition : 0 ); const int upperBoundPosition = ( (randomRefId == region.RightRefID) ? region.RightPosition : (references.at(randomRefId).RefLength - 1) ); randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition); } // otherwise select from all possible random refId & position else { // select random refId randomRefId = getRandomInt(0, (int)references.size() - 1); // select random position based on randomRefId const int lowerBoundPosition = 0; const int upperBoundPosition = references.at(randomRefId).RefLength - 1; randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition); } // if jump & read successful, save first alignment that overlaps random refId & position if ( reader.Jump(randomRefId, randomPosition) ) { while ( reader.GetNextAlignmentCore(al) ) { if ( al.RefID == randomRefId && al.Position >= randomPosition ) { writer.SaveAlignment(al); ++i; break; } } } } // cleanup & exit reader.Close(); writer.Close(); return true; }
int CountTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); // if no '-in' args supplied, default to stdin if ( !m_settings->HasInput ) m_settings->InputFiles.push_back(Options::StandardIn()); // open reader without index BamMultiReader reader; if (!reader.Open(m_settings->InputFiles, false, true)) { cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; return 1; } // alignment counter BamAlignment al; int alignmentCount(0); // if no region specified, count entire file if ( !m_settings->HasRegion ) { while ( reader.GetNextAlignmentCore(al) ) ++alignmentCount; } // otherwise attempt to use region as constraint else { // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { // attempt to re-open reader with index files reader.Close(); bool openedOK = reader.Open(m_settings->InputFiles, true, true ); // if error if ( !openedOK ) { cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; return 1; } // if index data available, we can use SetRegion if ( reader.IsIndexLoaded() ) { // attempt to use SetRegion(), if failed report error if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl; reader.Close(); return 1; } // everything checks out, just iterate through specified region, counting alignments while ( reader.GetNextAlignmentCore(al) ) ++alignmentCount; } // no index data available, we have to iterate through until we // find overlapping alignments else { while( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { ++alignmentCount; } } } } // error parsing REGION string else { cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl; cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl; reader.Close(); return 1; } } // print results cout << alignmentCount << endl; // clean & exit reader.Close(); return 0; }
int bbctools_create( BbcUtils::OptParser &optParser ) { const vector<string> cmdArgs = optParser.getArgs(); // remove .bbc extension from bbc file root, if present string bbcfileRoot = optParser.getOptValue( "bbc" ); int i = bbcfileRoot.size() - 4; if( i > 0 && bbcfileRoot.substr(i,4) == ".bbc" ) { bbcfileRoot = bbcfileRoot.substr(0,i); } bool f_bci = optParser.getOptBoolean("index"); bool f_cbc = optParser.getOptBoolean("coarse"); string targetRegions = optParser.getOptValue("regions"); string annotationFields = optParser.getOptValue( "annotationFields"); vector<string> auxRegionSplit = BbcUtils::mapToPairList(annotationFields); string sumstatsFile = optParser.getOptValue("sumStats"); string covstatsFile = optParser.getOptValue("covStats"); string readOrigFile = optParser.getOptValue("readOrigin"); string readType = optParser.getOptValue("readType"); string covDepths = optParser.getOptValue("covDepths","-"); double minPcCov = optParser.getOptNumber("minPcCov"); int32_t primerLength = optParser.getOptInteger( "primerLength", (readType == "AmpliSeq" ? 30 : 0) ); int32_t maxE2eEndGap = optParser.getOptInteger( "e2eGap", (readType == "AmpliSeq" ? 2 : 0) ); bool autoCreateBamIndex = optParser.getOptBoolean("autoCreateBamIndex"); bool samdepth = optParser.getOptBoolean("samdepth"); int32_t filterQuality = optParser.getOptInteger("minMAPQ"); int32_t minAlignLength = optParser.getOptInteger("minAlignLength"); bool filterDuplicates = optParser.getOptBoolean("noDups"); bool filterUnique = optParser.getOptBoolean("unique"); uint32_t skipFlag = filterDuplicates ? 0x704 : 0x304; uint16_t minMapQuality = filterUnique ? 1 : filterQuality; bool onlyOnTargetReads = optParser.getOptBoolean("onTargetReads"); bool onlyOnTargetBases = optParser.getOptBoolean("onTargetBases"); // possible future options bool invertOnTarget = false; // check basic valid argument values and combinations int numOuts = !bbcfileRoot.empty() + !covstatsFile.empty() + !sumstatsFile.empty() + !readOrigFile.empty(); int numPipes = (bbcfileRoot == "-") + (covstatsFile == "-") + (sumstatsFile == "-") + (readOrigFile == "-"); if( numOuts == 0 && !f_bci && !f_cbc ) { bbcfileRoot = "-"; // default if no other output specified } else if( numPipes > 1 ) { cerr << "Error: bbctools create: Only one file output (--covStats, --sumStats, --readOrigin or --bbc) may be piped to STDOUT." << endl; return -1; } else if( samdepth && numOuts ) { cerr << "Error: bbctools create: --samdepth (-s) option may only be used without other output options." << endl; return -1; } // check if single argument is a BBC file and leave open for reading if so BbcView bbcView; bool haveBbcFile = cmdArgs.size() == 1 && bbcView.Open( cmdArgs[0], true ); bbcView.SelectPrintStream( samdepth ? "SAMDEPTH" : "BBCVIEW" ); // check distinction between default and explicit no target regions - only for BBC input bool explicitNoTargetRegions = false; if( targetRegions == "-" ) { explicitNoTargetRegions = haveBbcFile; targetRegions = ""; } if( targetRegions.empty() ) { if( onlyOnTargetBases && explicitNoTargetRegions && !invertOnTarget ) { cerr << "Warning: bbctools create --onTargetBases (-b) option with --regions '-' produces no coverage." << endl; } else if( onlyOnTargetReads ) { cerr << "Error: bbctools create --onTargetReads (-r) option requires a --regions file." << endl; return -1; } } // check for legal BBC create options if( f_bci || f_cbc ) { if( (bbcfileRoot.empty() || bbcfileRoot == "-") && !haveBbcFile ) { string opt = f_bci ? "--index (-i)" : "--coarse (-c)"; cerr << "Error: bbctools create "+opt+" option requires the --bbc (-B) option or a BBC source file." << endl; return -1; } } BamMultiReader bamReader; if( haveBbcFile ) { // warn for options that do not work with BBC input if( filterQuality > 0 || filterDuplicates || filterUnique || minAlignLength ) { cerr << "Warning: SAM flag, alignment length and MAPQ filters ignored for BBC source file." << endl; } if( samdepth ) { cerr << "Error: --samdepth option is not supported for BBC source files." << endl; return -1; } if( !readOrigFile.empty() ) { cerr << "Error: --readOrigin option is not supported for BBC source files." << endl; return -1; } } else { // check / open for multiple BAM file inputs if ( !bamReader.Open(cmdArgs) ) { if( cmdArgs.size() == 1 ) cerr << "ERROR: Could not read input BAM file:"; else cerr << "ERROR: Could not read all input BAM files:"; // get and clean up bamtools error msg string errMsg = bamReader.GetErrorString(); size_t i = errMsg.find_first_of('\n'); if( i != string::npos ) errMsg = errMsg.substr(i+1); i = errMsg.find("::"); if( i != string::npos ) { i = errMsg.find(": "); if( i != string::npos ) errMsg = errMsg.substr(i+1); } errMsg = BbcUtils::stringTrim(errMsg); errMsg[0] = toupper(errMsg[0]); cerr << endl << errMsg << "." << endl; return 1; } } // grab reference list from either input source const RefVector &references = haveBbcFile ? bbcView.GetReferenceData() : bamReader.GetReferenceData(); if( !references.size() ) { // Issue would already been detected if input was BBC file cerr << "ERROR: " << (cmdArgs.size() > 1 ? "One or more " : ""); cerr << "BAM file contains unaligned reads (no references).\n"; return 1; } // check/set up target regions input regions/region statistics output RegionCoverage *regions = NULL; string covstatsStaticFields; bool trackRegionBaseCov = !covDepths.empty(); if( covstatsFile.empty() ) { trackRegionBaseCov = false; if( !annotationFields.empty() ) { cerr << "Warning: --annotationFields (A) option ignored without --covStats (-C) option." << endl; } if( !covDepths.empty() && covDepths != "-" ) { cerr << "Warning: --covDepths (-D) option ignored without --covStats (-C) option." << endl; } if( !readType.empty() ) { cerr << "Warning: --readType (-T) option ignored without --covStats (-C) option." << endl; } // read regions for input only and/or creating sumStats if( !targetRegions.empty() || explicitNoTargetRegions || !sumstatsFile.empty() ) { regions = new RegionCoverage(references); } } else if( readType == "trgreads" || readType == "amplicon" || readType == "AmpliSeq" ) { if( haveBbcFile ) { cerr << "Creation of read coverage requires BAM file input." << endl; return -1; } AmpliconRegionStatistics *ampRegionStats = new AmpliconRegionStatistics(references); ampRegionStats->SetGenericReads( readType == "trgreads" ); ampRegionStats->SetSigFacCoverage( minPcCov/100 ); ampRegionStats->SetMaxUpstreamPrimerStart( primerLength ); ampRegionStats->SetMaxE2eEndDistance( maxE2eEndGap ); covstatsStaticFields = "overlaps,"; covstatsStaticFields += (minPcCov > 0) ? "fwd_cov,rev_cov" : "fwd_e2e,rev_e2e"; covstatsStaticFields += ",total_reads,fwd_reads,rev_reads"; regions = ampRegionStats; } else if( readType == "trgbases" ) { if( haveBbcFile && targetRegions.empty() && !explicitNoTargetRegions ) { cerr << "Warning: Assuming reference contigs for base coverage targets (=> option --regions -)" << endl; } RegionStatistics *regionStats = new RegionStatistics(references); covstatsStaticFields = "covered,uncov_5p,uncov_3p,ave_basereads,fwd_basereads,rev_basereads"; trackRegionBaseCov = true; regions = regionStats; } else if( readType == "covdepth" || readType.empty() ) { // output (sorted) targets file with only covDepth stats (if any) regions = new RegionCoverage(references); } else { cerr << "Unknown read type '" << readType << "'" << endl; return -1; } // Load the input regions or default to whole reference contig targets if( regions ) { regions->SetCovAtDepths( covDepths == "-" ? "20,100,500" : covDepths ); if( targetRegions.empty() ) { regions->SetWholeContigTargets(); // set contigs as explicit regions means all reads will seen as on-target // for consistency these are inverted (for input from BBC) invertOnTarget = true; } else { string auxFieldIdx = auxRegionSplit.size() ? auxRegionSplit[0] : ""; string errMsg = regions->Load( targetRegions, "BED", auxFieldIdx ); if( !errMsg.empty() ) { cerr << "ERROR: " + errMsg + "\n"; return 1; } } if( onlyOnTargetReads && haveBbcFile ) { cerr << "Error: bbctools create --onTargetReads option is not supported for BBC source file." << endl; return -1; } } // // Perform all bbctools create utilities // BbcCreate *bbcCreate = NULL; if( !bbcfileRoot.empty() && (bbcfileRoot != "-" || !haveBbcFile) ) { bbcCreate = new BbcCreate(references); if( bbcfileRoot != "-" && !bbcCreate->Open(bbcfileRoot+".bbc") ) { return 1; } bbcCreate->SetNoOffTargetPositions(onlyOnTargetBases); } bbcView.SetNoOffTargetPositions(onlyOnTargetBases); // Stream input to output creators if( haveBbcFile ) { // BBC reader and driver via BbcView object if( bbcfileRoot != "-" || !covstatsFile.empty() ) { // disable BbcView text stream if using for file creation bbcView.SelectPrintStream("NONE"); } // process input BBC for just new BBC and target coverage (defer BCI/CBC) bbcView.SetBbcCreate(bbcCreate); bbcView.SetRegionCoverage(regions); // explicitNoTargetRegions intended for explicitly removing on-target coverage bbcView.SetInvertOnTarget(explicitNoTargetRegions ^ invertOnTarget); if( bbcCreate || regions || bbcfileRoot == "-" ) { bbcView.ReadAll(); } } else { // Test read tracking option for file write TrackReads *readTracker = NULL; try { if( !readOrigFile.empty() ) readTracker = new TrackReads( readOrigFile, regions ); } catch( std::runtime_error & ) { cerr << "ERROR: Unable to write to read tracking file " << readOrigFile << endl; return 1; } // BAM reader, BaseCoverage driver, dispatching to BbcCreate and BbcView objects BaseCoverage baseCov(references); baseCov.SetRegionCoverage(regions); baseCov.SetBbcCreate(bbcCreate); baseCov.SetInvertOnTarget(invertOnTarget); if( bbcfileRoot == "-" ) { baseCov.SetBbcView(&bbcView); } // Certain options require that all reads are processed, invalidating other performance options bool trackAllReads = !sumstatsFile.empty() || readTracker; // Implicit set of onlyOnTargetReads for performance when only these reads are required bool useBaseCov = (bbcfileRoot == "-" || bbcCreate); if( !targetRegions.empty() && !trackAllReads ) { onlyOnTargetReads |= onlyOnTargetBases; if( samdepth || !useBaseCov ) onlyOnTargetReads = true; } useBaseCov |= trackRegionBaseCov; // do not allow jumping if sumStats option is used - need to count all reads bool bamReaderSetRegions = (s_useBamReaderJump && !trackAllReads); int trgContig = 0, trgSrtPos = 0, trgEndPos = 0; int minJumpLen = s_initialMinJumpLen; int maxReadLen = s_initialMaxReadLen; if( onlyOnTargetReads ) { // load/create BAM index files for targeted reading // Note: BamIndex::BAMTOOLS format performed very badly and cannot use mixed with BTI/BAI files if( bamReaderSetRegions && !bamReader.LocateIndexes() ) { string plural( cmdArgs.size() > 1 ? "s" : "" ); if( autoCreateBamIndex ) { cerr << "Warning: Did not locate BAM index (BAI) file" << plural << ", creating bamtools version..." << endl; // to avoid bug use new instance of BamMultiReader BamMultiReader bamReader2; if( !bamReader2.Open(cmdArgs) || !bamReader2.CreateIndexes() ) { cerr << "WARNING: Failed to create BAM index file" << plural << "." << endl; bamReaderSetRegions = false; } else { if( cmdArgs.size() == 1 ) { cerr << "Successfully created BAM index file: " << BbcUtils::fileName(cmdArgs[0]) << ".bai" << endl; } else { cerr << "Successfully created BAM index files." << endl; } // re-locate indexes with first reader - could not seem to locate BTI files created! if( !bamReader.LocateIndexes() ) { cerr << "WARNING: Failed to locate BAM index file" << plural << " just created!" << endl; bamReaderSetRegions = false; } } } else { cerr << "Warning: BAM index file" << plural << " not located for targeted BAM access." << endl; bamReaderSetRegions = false; } } // cancel region filtering if there are no regions to iterate (unexpected) if( !regions->GetNextRegion( trgContig, trgSrtPos, trgEndPos ) ) { onlyOnTargetReads = bamReaderSetRegions = false; } if( bamReaderSetRegions ) { bamReader.Jump( trgContig, trgSrtPos-maxReadLen ); } } BamAlignment aln; while( bamReader.GetNextAlignmentCore(aln) ) { // appears to be an undocumented behavior here if( aln.RefID < 0 ) continue; // skip filtered reads by flag, length or mapping quality if( aln.AlignmentFlag & skipFlag ) continue; if( aln.MapQuality < minMapQuality ) continue; int32_t endPos = aln.GetEndPosition(); if( minAlignLength > 0 ) { if( endPos - aln.Position < minAlignLength ) continue; } // screen for on-target reads if( onlyOnTargetReads ) { // find next region overlapping or beyond of current read bool moreRegions = true; bool setRegion = false; while( aln.RefID > trgContig || (aln.RefID == trgContig && aln.Position > trgEndPos) ) { if( !regions->GetNextRegion( trgContig, trgSrtPos, trgEndPos ) ) { moreRegions = false; break; } setRegion = bamReaderSetRegions; } if( !moreRegions ) { // prevent further on-target checks and exit early if not using sumStats onlyOnTargetReads = false; if( trackAllReads ) { // force tracking of off-target reads regions->TrackReadsOnRegion(aln,endPos); if( readTracker ) readTracker->Write(aln,endPos); continue; } break; } if( setRegion ) { // track max read length for future index jumps - just in case long reads ever used if( endPos - aln.Position > maxReadLen ) { maxReadLen = endPos - aln.Position; if( maxReadLen > minJumpLen ) minJumpLen = maxReadLen; } if( aln.RefID != trgContig || trgSrtPos - aln.Position > minJumpLen ) { bamReader.Jump( trgContig, trgSrtPos-maxReadLen ); } } if( aln.RefID < trgContig || endPos < trgSrtPos ) { // force tracking of off-target reads if( trackAllReads ) { regions->TrackReadsOnRegion(aln,endPos); if( readTracker ) readTracker->Write(aln,endPos); } continue; // current is before next target region - fetch the next within bounds } } // record base coverage and region coverage statistics if( useBaseCov ) { endPos = baseCov.AddAlignment(aln,endPos); if( endPos <= 0 ) { if( endPos == 0 ) continue; // read was silently ignored cerr << "ERROR: BAM file is not correctly sorted vs. reference." << endl; return 1; } } // record read coverage and region coverage statistics if( regions ) { regions->TrackReadsOnRegion(aln,endPos); } if( readTracker ) { readTracker->Write(aln,endPos); } } // flush and close objects associated with output baseCov.Flush(); } // Output in-memory region stats file and ensure BBC file is closed if( regions ) { // build output fields title string string outFields = "contig_id,contig_srt,contig_end"; if( !auxRegionSplit.empty() ) outFields += "," + auxRegionSplit[1]; if( !covstatsStaticFields.empty() ) outFields += "," + covstatsStaticFields; regions->Write( covstatsFile, outFields ); if( !sumstatsFile.empty() ) { regions->WriteSummary( sumstatsFile, invertOnTarget ); } delete regions; } delete bbcCreate; // Complete remaining file creation options using a BBC file input // NOTE: Using BbbCreate for this would require code duplication and concurrent file output streaming if( f_bci || f_cbc ) { // Check BBC file source if( haveBbcFile ) { bbcfileRoot = cmdArgs[0]; int i = bbcfileRoot.size() - 4; if( i > 0 && bbcfileRoot.substr(i,4) == ".bbc" ) { bbcfileRoot = bbcfileRoot.substr(0,i); } } else if( !bbcView.Open( bbcfileRoot+".bbc", true ) ) { cerr << "ERROR: Unexpected failure to read new BBC file '"+bbcfileRoot+".bam'" << endl; return 1; } if( f_bci ) { BbcIndex indexer( bbcfileRoot+".bci" ); if( !bbcView.CreateIndex(indexer) ) { cerr << "ERROR: Failed to create index file '" << bbcfileRoot << ".bci'" << endl; return 1; } } if( f_cbc ) { // CBC generation can use BCI file but is no faster since whole BBC file is read BbcCoarse cbcWriter( bbcfileRoot+".cbc" ); if( !bbcView.CreateCbc(cbcWriter) ) { cerr << "ERROR: Failed to create coarse base coverage file '" << bbcfileRoot << ".cbc'" << endl; return 1; } } } return 0; }
bool MergeTool::MergeToolPrivate::Run(void) { // set to default input if none provided if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn()); // opens the BAM files (by default without checking for indexes) BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools merge ERROR: could not open input BAM file(s)... Aborting." << endl; return false; } // retrieve header & reference dictionary info std::string mergedHeader = reader.GetHeaderText(); RefVector references = reader.GetReferenceData(); // determine compression mode for BamWriter bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); BamWriter::CompressionMode compressionMode = BamWriter::Compressed; if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed; // open BamWriter BamWriter writer; writer.SetCompressionMode(compressionMode); if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references) ) { cerr << "bamtools merge ERROR: could not open " << m_settings->OutputFilename << " for writing." << endl; reader.Close(); return false; } // if no region specified, store entire contents of file(s) if ( !m_settings->HasRegion ) { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); } // otherwise attempt to use region as constraint else { // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { // attempt to find index files reader.LocateIndexes(); // if index data available for all BAM files, we can use SetRegion if ( reader.HasIndexes() ) { // attempt to use SetRegion(), if failed report error if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { cerr << "bamtools merge ERROR: set region failed. Check that REGION describes a valid range" << endl; reader.Close(); return false; } // everything checks out, just iterate through specified region, storing alignments BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); } // no index data available, we have to iterate through until we // find overlapping alignments else { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { writer.SaveAlignment(al); } } } } // error parsing REGION string else { cerr << "bamtools merge ERROR: could not parse REGION - " << m_settings->Region << endl; cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" << endl; reader.Close(); writer.Close(); return false; } } // clean & exit reader.Close(); writer.Close(); return true; }
int FileReader::runInternal() { ogeNameThread("am_FileReader"); if(!format_specified) format = deduceFileFormat(); if(format == FORMAT_BAM) { BamMultiReader reader; if(!reader.Open(filenames)) { cerr << "Error opening BAM files." << endl; reader.Close(); return -1; } header = reader.GetHeader(); references = reader.GetReferenceData(); open = true; BamAlignment * al; while(true) { if(load_string_data) al = reader.GetNextAlignment(); else al = reader.GetNextAlignmentCore(); if(!al) break; putOutputAlignment(al); } reader.Close(); } else if(format == FORMAT_SAM) { vector<SamReader> readers; SamHeader first_header; // before doing any reading, open the files to // verify they are the right format, etc. for(int i = 0; i < filenames.size(); i++) { SamReader reader; if(!reader.Open(filenames[i])) { cerr << "Error opening SAM file: " << filenames[i] << endl; return -1; } if(filenames.size() > 1 && i == 0) first_header = header; // TODO: We can probably find a better way to deal with multiple SAM file headers, // but for now we should disallow different headers to avoid issues. if(i > 0 && header.ToString() != first_header.ToString()) cerr << "Warning! SAM input files have different headers." << endl; reader.Close(); } for(int i = 0; i < filenames.size(); i++) { SamReader reader; if(!reader.Open(filenames[i])) { cerr << "Error opening SAM file: " << filenames[i] << endl; return -1; } header = reader.GetHeader(); references = reader.GetReferenceData(); open = true; if(filenames.size() > 1 && i == 0) first_header = header; BamAlignment * al = NULL; while(true) { al = reader.GetNextAlignment(); if(NULL == al) break; putOutputAlignment(al); } reader.Close(); } } else { cerr << "FileReader couldn't detect file format. Aborting." << endl; exit(-1); return -1; } return 0; }
int MergeTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); // set to default input if none provided if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn()); // opens the BAM files (by default without checking for indexes) BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles, false, true) ) { cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; return 1; } // retrieve header & reference dictionary info std::string mergedHeader = reader.GetHeaderText(); RefVector references = reader.GetReferenceData(); // open writer BamWriter writer; bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references, writeUncompressed) ) { cerr << "ERROR: Could not open BAM file " << m_settings->OutputFilename << " for writing... Aborting." << endl; reader.Close(); return 1; } // if no region specified, store entire contents of file(s) if ( !m_settings->HasRegion ) { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); } // otherwise attempt to use region as constraint else { // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { // attempt to re-open reader with index files reader.Close(); bool openedOK = reader.Open(m_settings->InputFiles, true, true ); // if error if ( !openedOK ) { cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; return 1; } // if index data available, we can use SetRegion if ( reader.IsIndexLoaded() ) { // attempt to use SetRegion(), if failed report error if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl; reader.Close(); return 1; } // everything checks out, just iterate through specified region, storing alignments BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); } // no index data available, we have to iterate through until we // find overlapping alignments else { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { writer.SaveAlignment(al); } } } } // error parsing REGION string else { cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl; cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl; reader.Close(); writer.Close(); return 1; } } // clean & exit reader.Close(); writer.Close(); return 0; }