bool ConvertTool::ConvertToolPrivate::Run(void) { // ------------------------------------ // initialize conversion input/output // set to default input if none provided if ( !m_settings->HasInput ) m_settings->InputFiles.push_back(Options::StandardIn()); // open input files BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools convert ERROR: could not open input BAM file(s)... Aborting." << endl; return false; } // if input is not stdin & a region is provided, look for index files if ( m_settings->HasInput && m_settings->HasRegion ) { if ( !reader.LocateIndexes() ) { cerr << "bamtools convert ERROR: could not locate index file(s)... Aborting." << endl; return false; } } // retrieve reference data m_references = reader.GetReferenceData(); // set region if specified BamRegion region; if ( m_settings->HasRegion ) { if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { if ( reader.HasIndexes() ) { if ( !reader.SetRegion(region) ) { cerr << "bamtools convert ERROR: set region failed. Check that REGION describes a valid range" << endl; reader.Close(); return false; } } } else { cerr << "bamtools convert ERROR: could not parse REGION: " << m_settings->Region << endl; cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" << endl; reader.Close(); return false; } } // if output file given ofstream outFile; if ( m_settings->HasOutput ) { // open output file stream outFile.open(m_settings->OutputFilename.c_str()); if ( !outFile ) { cerr << "bamtools convert ERROR: could not open " << m_settings->OutputFilename << " for output" << endl; return false; } // set m_out to file's streambuf m_out.rdbuf(outFile.rdbuf()); } // ------------------------------------- // do conversion based on format bool convertedOk = true; // pileup is special case // conversion not done per alignment, like the other formats if ( m_settings->Format == FORMAT_PILEUP ) convertedOk = RunPileupConversion(&reader); // all other formats else { bool formatError = false; // set function pointer to proper conversion method void (BamTools::ConvertTool::ConvertToolPrivate::*pFunction)(const BamAlignment&) = 0; if ( m_settings->Format == FORMAT_BED ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBed; else if ( m_settings->Format == FORMAT_FASTA ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFasta; else if ( m_settings->Format == FORMAT_FASTQ ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFastq; else if ( m_settings->Format == FORMAT_JSON ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintJson; else if ( m_settings->Format == FORMAT_SAM ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintSam; else if ( m_settings->Format == FORMAT_YAML ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintYaml; else { cerr << "bamtools convert ERROR: unrecognized format: " << m_settings->Format << endl; cerr << "Please see documentation for list of supported formats " << endl; formatError = true; convertedOk = false; } // if format selected ok if ( !formatError ) { // if SAM format & not omitting header, print SAM header first if ( (m_settings->Format == FORMAT_SAM) && !m_settings->IsOmittingSamHeader ) m_out << reader.GetHeaderText(); // iterate through file, doing conversion BamAlignment a; while ( reader.GetNextAlignment(a) ) (this->*pFunction)(a); // set flag for successful conversion convertedOk = true; } } // ------------------------ // clean up & exit reader.Close(); if ( m_settings->HasOutput ) outFile.close(); return convertedOk; }
int CountTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); // if no '-in' args supplied, default to stdin if ( !m_settings->HasInput ) m_settings->InputFiles.push_back(Options::StandardIn()); // open reader without index BamMultiReader reader; if (!reader.Open(m_settings->InputFiles, false, true)) { cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; return 1; } // alignment counter BamAlignment al; int alignmentCount(0); // if no region specified, count entire file if ( !m_settings->HasRegion ) { while ( reader.GetNextAlignmentCore(al) ) ++alignmentCount; } // otherwise attempt to use region as constraint else { // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { // attempt to re-open reader with index files reader.Close(); bool openedOK = reader.Open(m_settings->InputFiles, true, true ); // if error if ( !openedOK ) { cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; return 1; } // if index data available, we can use SetRegion if ( reader.IsIndexLoaded() ) { // attempt to use SetRegion(), if failed report error if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl; reader.Close(); return 1; } // everything checks out, just iterate through specified region, counting alignments while ( reader.GetNextAlignmentCore(al) ) ++alignmentCount; } // no index data available, we have to iterate through until we // find overlapping alignments else { while( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { ++alignmentCount; } } } } // error parsing REGION string else { cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl; cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl; reader.Close(); return 1; } } // print results cout << alignmentCount << endl; // clean & exit reader.Close(); return 0; }
int main (int argc, char * argv[]) { vector<string> inputFilenames; string combinedOutFilename, alignmentsOutFilename; try { TCLAP::CmdLine cmd("Program description", ' ', VERSION); TCLAP::ValueArg<string> combinedOutputArg("o", "out", "Combined output filename (BAM format)", true, "", "combined.bam", cmd); TCLAP::ValueArg<int> minInsertArg("n", "min-insert", "Minimum insert size", false, DEFAULT_MIN_GAP, "min insert size", cmd); TCLAP::ValueArg<int> maxInsertArg("x", "max-insert", "Maximum insert size", false, DEFAULT_MAX_GAP, "max insert size", cmd); TCLAP::MultiArg<string> inputArgs("b", "bam", "Input BAM file", true, "input.bam", cmd); cmd.parse(argc, argv); combinedOutFilename = combinedOutputArg.getValue(); MIN_GAP = minInsertArg.getValue(); MAX_GAP = maxInsertArg.getValue(); inputFilenames = inputArgs.getValue(); } catch (TCLAP::ArgException &e) { cerr << "Error: " << e.error() << " " << e.argId() << endl; } // TODO require that alignments are sorted by name BamMultiReader reader; reader.Open(inputFilenames); if (!ValidOut.Open(combinedOutFilename, reader.GetHeader(), reader.GetReferenceData())) { cerr << ValidOut.GetErrorString() << endl; return 1; } string current, prev; char mateID; Group group; set<string> references; Alignment a; while (reader.GetNextAlignment(a)) { parseID(a.Name, current, mateID); if (current.compare(prev) && prev.size() > 0) { processGroup(group, references); group.clear(); references.clear(); } references.insert(a.RefName); GroupKey key; key.refID = a.RefName; key.mateID = mateID; key.rev = a.IsReverseStrand(); group.insert( std::make_pair( key, a ) ); prev = current; } processGroup(group, references); }
bool ConvertTool::ConvertToolPrivate::Run(void) { // ------------------------------------ // initialize conversion input/output // set to default input if none provided if ( !m_settings->HasInput ) m_settings->InputFiles.push_back(Options::StandardIn()); // open input files BamMultiReader reader; if ( !m_settings->HasInput ) { // don't attempt to open index for stdin if ( !reader.Open(m_settings->InputFiles, false) ) { cerr << "Could not open input files" << endl; return false; } } else { if ( !reader.Open(m_settings->InputFiles, true) ) { if ( !reader.Open(m_settings->InputFiles, false) ) { cerr << "Could not open input files" << endl; return false; } else { cerr << "Opened reader without index file, jumping is disabled." << endl; } } } m_references = reader.GetReferenceData(); // set region if specified BamRegion region; if ( m_settings->HasRegion ) { if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { if ( !reader.SetRegion(region) ) { cerr << "Could not set BamReader region to REGION: " << m_settings->Region << endl; return false; } } else { cerr << "Could not parse REGION: " << m_settings->Region << endl; return false; } } // if output file given ofstream outFile; if ( m_settings->HasOutput ) { // open output file stream outFile.open(m_settings->OutputFilename.c_str()); if ( !outFile ) { cerr << "Could not open " << m_settings->OutputFilename << " for output" << endl; return false; } // set m_out to file's streambuf m_out.rdbuf(outFile.rdbuf()); } // ------------------------------------- // do conversion based on format bool convertedOk = true; // pileup is special case // conversion not done per alignment, like the other formats if ( m_settings->Format == FORMAT_PILEUP ) convertedOk = RunPileupConversion(&reader); // all other formats else { bool formatError = false; // set function pointer to proper conversion method void (BamTools::ConvertTool::ConvertToolPrivate::*pFunction)(const BamAlignment&) = 0; if ( m_settings->Format == FORMAT_BED ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBed; else if ( m_settings->Format == FORMAT_BEDGRAPH ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBedGraph; else if ( m_settings->Format == FORMAT_FASTA ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFasta; else if ( m_settings->Format == FORMAT_FASTQ ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFastq; else if ( m_settings->Format == FORMAT_JSON ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintJson; else if ( m_settings->Format == FORMAT_SAM ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintSam; else if ( m_settings->Format == FORMAT_WIGGLE ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintWiggle; else if ( m_settings->Format == FORMAT_YAML ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintYaml; else { cerr << "Unrecognized format: " << m_settings->Format << endl; cerr << "Please see help|README (?) for details on supported formats " << endl; formatError = true; convertedOk = false; } // if format selected ok if ( !formatError ) { // if SAM format & not omitting header, print SAM header first if ( (m_settings->Format == FORMAT_SAM) && !m_settings->IsOmittingSamHeader ) m_out << reader.GetHeaderText(); // iterate through file, doing conversion BamAlignment a; while ( reader.GetNextAlignment(a) ) (this->*pFunction)(a); // set flag for successful conversion convertedOk = true; } } // ------------------------ // clean up & exit reader.Close(); if ( m_settings->HasOutput ) outFile.close(); return convertedOk; }
int main ( int argc, char *argv[] ) { struct parameters *param = 0; param = interface(param, argc, argv); //bam input and generate index if not yet //-------------------------------------------------------------------------------------------------------+ // BAM input (file or filenames?) | //-------------------------------------------------------------------------------------------------------+ char *fof = param->mapping_f; FILE *IN=NULL; char linefof[5000]; int filecount=0; vector <string> fnames; if (strchr(fof,' ')!=NULL) { char *ptr; ptr=strtok(fof," "); while (ptr!=NULL) { fnames.push_back(ptr); filecount++; ptr=strtok(NULL," "); } } else { IN=fopen(fof,"rt"); if (IN!=NULL) { long linecount=0; while (fgets(linefof,5000-1,IN)!=NULL) { linecount++; if (linefof[0]!='#' && linefof[0]!='\n') { char *ptr=strchr(linefof,'\n'); if (ptr!=NULL && ptr[0]=='\n') { ptr[0]='\0'; } FILE *dummy=NULL; dummy=fopen(linefof,"rt"); if (dummy!=NULL) { // seems to be a file of filenames... fclose(dummy); fnames.push_back(linefof); filecount++; } else if (filecount==0 || linecount>=1000-1) { // seems to be a single file fnames.push_back(fof); filecount++; break; } } } fclose(IN); } } //file or file name decided and stored in vector "fnames" cerr << "the input mapping files are:" << endl; vector <string>::iterator fit = fnames.begin(); for(; fit != fnames.end(); fit++) { cerr << *fit << endl; } //-------------------------------------------------------------------------------------------------------+ // end of file or filenames | //-------------------------------------------------------------------------------------------------------+ // open the BAM file(s) BamMultiReader reader; reader.Open(fnames); // get header & reference information string header = reader.GetHeaderText(); RefVector refs = reader.GetReferenceData(); // attempt to open BamWriter BamWriter writer; string outputBam = param->writer; if ( outputBam != "" ) { if ( !writer.Open(param->writer, header, refs) ) { cerr << "Could not open output BAM file" << endl; exit(0); } } BamAlignment bam; while (reader.GetNextAlignment(bam)) { //change RG string rg = "RG"; string rgType = "Z"; string rgValue = "1"; bam.EditTag(rg,rgType,rgValue); writer.SaveAlignment(bam); } // read a bam return 0; } //main
bool FilterTool::FilterToolPrivate::Run(void) { // set to default input if none provided if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); // add files in the filelist to the input file list if ( m_settings->HasInputFilelist ) { ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); if ( !filelist.is_open() ) { cerr << "bamtools filter ERROR: could not open input BAM file list... Aborting." << endl; return false; } string line; while ( getline(filelist, line) ) m_settings->InputFiles.push_back(line); } // initialize defined properties & user-specified filters // quit if failed if ( !SetupFilters() ) return false; // open reader without index BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools filter ERROR: could not open input files for reading." << endl; return false; } // retrieve reader header & reference data const string headerText = reader.GetHeaderText(); filterToolReferences = reader.GetReferenceData(); // determine compression mode for BamWriter bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); BamWriter::CompressionMode compressionMode = BamWriter::Compressed; if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed; // open BamWriter BamWriter writer; writer.SetCompressionMode(compressionMode); if ( !writer.Open(m_settings->OutputFilename, headerText, filterToolReferences) ) { cerr << "bamtools filter ERROR: could not open " << m_settings->OutputFilename << " for writing." << endl; reader.Close(); return false; } // if no region specified, filter entire file BamAlignment al; if ( !m_settings->HasRegion ) { while ( reader.GetNextAlignment(al) ) { if ( CheckAlignment(al) ) writer.SaveAlignment(al); } } // otherwise attempt to use region as constraint else { // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { // attempt to find index files reader.LocateIndexes(); // if index data available for all BAM files, we can use SetRegion if ( reader.HasIndexes() ) { // attempt to use SetRegion(), if failed report error if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { cerr << "bamtools filter ERROR: set region failed. Check that REGION describes a valid range" << endl; reader.Close(); return false; } // everything checks out, just iterate through specified region, filtering alignments while ( reader.GetNextAlignment(al) ) if ( CheckAlignment(al) ) writer.SaveAlignment(al); } // no index data available, we have to iterate through until we // find overlapping alignments else { while ( reader.GetNextAlignment(al) ) { if ( (al.RefID >= region.LeftRefID) && ((al.Position + al.Length) >= region.LeftPosition) && (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { if ( CheckAlignment(al) ) writer.SaveAlignment(al); } } } } // error parsing REGION string else { cerr << "bamtools filter ERROR: could not parse REGION: " << m_settings->Region << endl; cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" << endl; reader.Close(); return false; } } // clean up & exit reader.Close(); writer.Close(); return true; }
int CropBamTool::CropBam() { // open bam files BamMultiReader bamReader; bamReader.Open(bamFiles); // the dictionary of chromosomes RefVector genome = bamReader.GetReferenceData(); // get the scanning window vector<tuple<int,int,int>> windows; int numWindows = GenericRegionTools::toScanWindow(genome, regionStrings, windows); unordered_set<string> readpool; // temporary struct for sequence object typedef struct { string name; int head_soft_clip; int tail_soft_clip; string seq; string qual; }cropbam_seq_t; // temporary struct for unique seqs map<string,list<cropbam_seq_t>> uniqueSeqPool; // lambda expression for output auto Output = [this](cropbam_seq_t &a){ if (this->outFormat=="fasta"){ cout << ">" << a.name << "\t" << "head_soft_clip=" << a.head_soft_clip << "\t" << "tail_soft_clip=" << a.tail_soft_clip << "\t" << endl << a.seq << endl; } if (this->outFormat=="fastq"){ cout << "@" << a.name << "\t" << "head_soft_clip=" << a.head_soft_clip << "\t" << "tail_soft_clip=" << a.tail_soft_clip << "\t" << endl << a.seq << endl; cout << "+" << endl << a.qual << endl; } }; // loop over windows omp_set_dynamic(0); omp_set_num_threads(numThreads); #pragma omp parallel for shared(genome) for (int i=0; i<numWindows; i++) { clock_t tStart = clock(); bamReader.Open(bamFiles); int wId = get<0>(windows[i]); int wLp = get<1>(windows[i]); int wRp = get<2>(windows[i]); if (verbose>=1) Verbose("process the window " + genome[wId].RefName + ":" + to_string(wLp+1) + "-" + to_string(wRp)); // rewind the bam reader bamReader.Rewind(); // set the region bamReader.SetRegion(wId, wLp, wId, wRp); int numReads = 0; // retrieve the alignment BamAlignment aln; while (bamReader.GetNextAlignment(aln)) { // skip the alignment if it doesn't overlap the window if (aln.Position>=wRp || aln.GetEndPosition()<=wLp) continue; // skip the invalid alignment if (!isValidAlignment(aln, readLenThres, mapQualThres, alnFlagMarker)) continue; // skip the alignment harboring too many mismatches if (!GenericBamAlignmentTools::validReadIdentity(aln, 1-alnIdenThres)) continue; stringstream keyss; keyss << GenericBamAlignmentTools::getBamAlignmentName(aln) << "-" << wId << "-" << wLp << "-" << wRp; string key = keyss.str(); auto ptr = readpool.find(key); if (ptr!=readpool.end()) continue; readpool.emplace(key); // get the partial read string readSegment, readQualSegment, genomeSegment; GenericBamAlignmentTools::getLocalAlignment(aln, wLp, wRp-wLp, readSegment, readQualSegment, genomeSegment); // add soft clip int hsc=0; auto ptr0 = aln.CigarData.begin(); if (aln.Position>=wLp && (ptr0->Type=='S' || ptr0->Type=='H')) { stringstream headClipSeq, headClipQual; for (int i=0; i<ptr0->Length; i++) { headClipSeq << aln.QueryBases[i]; headClipQual << aln.Qualities[i]; } if (keepClip) { readSegment=headClipSeq.str()+readSegment; readQualSegment=headClipQual.str()+readQualSegment; } hsc += ptr0->Length; } int tsc=0; auto ptr1 = aln.CigarData.rbegin(); if (aln.GetEndPosition()<wRp && (ptr1->Type=='S' || ptr1->Type=='H')) { string ss="", qs=""; auto str=aln.QueryBases.rbegin(); auto qtr=aln.Qualities.rbegin(); for (int i=0; i<ptr1->Length; i++,str++,qtr++) { ss=(*str)+ss; qs=(*qtr)+qs; } if (keepClip) { readSegment=readSegment+ss; readQualSegment=readQualSegment+qs; } tsc += ptr1->Length; } if (readSegment.length()>=segmentLenThres) { cropbam_seq_t a; a.name = GenericBamAlignmentTools::getBamAlignmentName(aln); a.head_soft_clip = hsc; a.tail_soft_clip = tsc; a.seq = readSegment; a.qual = readQualSegment; if (uniqueSeqPool.count(a.seq)==0) uniqueSeqPool[a.seq] = list<cropbam_seq_t>(1,a); else uniqueSeqPool[a.seq].emplace_back(a); // if (outFormat=="fasta"){ // cout << ">" << GenericBamAlignmentTools::getBamAlignmentName(aln) << "\t" // << "head_soft_clip=" << hsc << "\t" // << "tail_soft_clip=" << tsc << "\t" // << endl // << readSegment << endl; // } // if (outFormat=="fastq"){ // cout << "@" << GenericBamAlignmentTools::getBamAlignmentName(aln) << "\t" // << "head_soft_clip=" << hsc << "\t" // << "tail_soft_clip=" << tsc << "\t" // << endl // << readSegment << endl; // cout << "+" << endl // << readQualSegment << endl; // } numReads++; } } numReads = 0; if (useUnique){ ofstream of; of.open(outFreq); for (auto a : uniqueSeqPool){ if (a.second.size()>=thresFreq){ Output(*a.second.begin()); of << a.second.begin()->name << "\t" << a.second.size() << endl; numReads ++; } } of.close(); }else{ for (auto a : uniqueSeqPool){ for (auto b : a.second){ Output(b); numReads ++; } } } clock_t tEnd = clock(); if (verbose>=1) Verbose("retrieve " + to_string(numReads) + " reads"); if (verbose>=1) Verbose("time elapsed " + to_string((double)(tEnd-tStart)/CLOCKS_PER_SEC) + " seconds"); } return 0; }
int bbctools_create( BbcUtils::OptParser &optParser ) { const vector<string> cmdArgs = optParser.getArgs(); // remove .bbc extension from bbc file root, if present string bbcfileRoot = optParser.getOptValue( "bbc" ); int i = bbcfileRoot.size() - 4; if( i > 0 && bbcfileRoot.substr(i,4) == ".bbc" ) { bbcfileRoot = bbcfileRoot.substr(0,i); } bool f_bci = optParser.getOptBoolean("index"); bool f_cbc = optParser.getOptBoolean("coarse"); string targetRegions = optParser.getOptValue("regions"); string annotationFields = optParser.getOptValue( "annotationFields"); vector<string> auxRegionSplit = BbcUtils::mapToPairList(annotationFields); string sumstatsFile = optParser.getOptValue("sumStats"); string covstatsFile = optParser.getOptValue("covStats"); string readOrigFile = optParser.getOptValue("readOrigin"); string readType = optParser.getOptValue("readType"); string covDepths = optParser.getOptValue("covDepths","-"); double minPcCov = optParser.getOptNumber("minPcCov"); int32_t primerLength = optParser.getOptInteger( "primerLength", (readType == "AmpliSeq" ? 30 : 0) ); int32_t maxE2eEndGap = optParser.getOptInteger( "e2eGap", (readType == "AmpliSeq" ? 2 : 0) ); bool autoCreateBamIndex = optParser.getOptBoolean("autoCreateBamIndex"); bool samdepth = optParser.getOptBoolean("samdepth"); int32_t filterQuality = optParser.getOptInteger("minMAPQ"); int32_t minAlignLength = optParser.getOptInteger("minAlignLength"); bool filterDuplicates = optParser.getOptBoolean("noDups"); bool filterUnique = optParser.getOptBoolean("unique"); uint32_t skipFlag = filterDuplicates ? 0x704 : 0x304; uint16_t minMapQuality = filterUnique ? 1 : filterQuality; bool onlyOnTargetReads = optParser.getOptBoolean("onTargetReads"); bool onlyOnTargetBases = optParser.getOptBoolean("onTargetBases"); // possible future options bool invertOnTarget = false; // check basic valid argument values and combinations int numOuts = !bbcfileRoot.empty() + !covstatsFile.empty() + !sumstatsFile.empty() + !readOrigFile.empty(); int numPipes = (bbcfileRoot == "-") + (covstatsFile == "-") + (sumstatsFile == "-") + (readOrigFile == "-"); if( numOuts == 0 && !f_bci && !f_cbc ) { bbcfileRoot = "-"; // default if no other output specified } else if( numPipes > 1 ) { cerr << "Error: bbctools create: Only one file output (--covStats, --sumStats, --readOrigin or --bbc) may be piped to STDOUT." << endl; return -1; } else if( samdepth && numOuts ) { cerr << "Error: bbctools create: --samdepth (-s) option may only be used without other output options." << endl; return -1; } // check if single argument is a BBC file and leave open for reading if so BbcView bbcView; bool haveBbcFile = cmdArgs.size() == 1 && bbcView.Open( cmdArgs[0], true ); bbcView.SelectPrintStream( samdepth ? "SAMDEPTH" : "BBCVIEW" ); // check distinction between default and explicit no target regions - only for BBC input bool explicitNoTargetRegions = false; if( targetRegions == "-" ) { explicitNoTargetRegions = haveBbcFile; targetRegions = ""; } if( targetRegions.empty() ) { if( onlyOnTargetBases && explicitNoTargetRegions && !invertOnTarget ) { cerr << "Warning: bbctools create --onTargetBases (-b) option with --regions '-' produces no coverage." << endl; } else if( onlyOnTargetReads ) { cerr << "Error: bbctools create --onTargetReads (-r) option requires a --regions file." << endl; return -1; } } // check for legal BBC create options if( f_bci || f_cbc ) { if( (bbcfileRoot.empty() || bbcfileRoot == "-") && !haveBbcFile ) { string opt = f_bci ? "--index (-i)" : "--coarse (-c)"; cerr << "Error: bbctools create "+opt+" option requires the --bbc (-B) option or a BBC source file." << endl; return -1; } } BamMultiReader bamReader; if( haveBbcFile ) { // warn for options that do not work with BBC input if( filterQuality > 0 || filterDuplicates || filterUnique || minAlignLength ) { cerr << "Warning: SAM flag, alignment length and MAPQ filters ignored for BBC source file." << endl; } if( samdepth ) { cerr << "Error: --samdepth option is not supported for BBC source files." << endl; return -1; } if( !readOrigFile.empty() ) { cerr << "Error: --readOrigin option is not supported for BBC source files." << endl; return -1; } } else { // check / open for multiple BAM file inputs if ( !bamReader.Open(cmdArgs) ) { if( cmdArgs.size() == 1 ) cerr << "ERROR: Could not read input BAM file:"; else cerr << "ERROR: Could not read all input BAM files:"; // get and clean up bamtools error msg string errMsg = bamReader.GetErrorString(); size_t i = errMsg.find_first_of('\n'); if( i != string::npos ) errMsg = errMsg.substr(i+1); i = errMsg.find("::"); if( i != string::npos ) { i = errMsg.find(": "); if( i != string::npos ) errMsg = errMsg.substr(i+1); } errMsg = BbcUtils::stringTrim(errMsg); errMsg[0] = toupper(errMsg[0]); cerr << endl << errMsg << "." << endl; return 1; } } // grab reference list from either input source const RefVector &references = haveBbcFile ? bbcView.GetReferenceData() : bamReader.GetReferenceData(); if( !references.size() ) { // Issue would already been detected if input was BBC file cerr << "ERROR: " << (cmdArgs.size() > 1 ? "One or more " : ""); cerr << "BAM file contains unaligned reads (no references).\n"; return 1; } // check/set up target regions input regions/region statistics output RegionCoverage *regions = NULL; string covstatsStaticFields; bool trackRegionBaseCov = !covDepths.empty(); if( covstatsFile.empty() ) { trackRegionBaseCov = false; if( !annotationFields.empty() ) { cerr << "Warning: --annotationFields (A) option ignored without --covStats (-C) option." << endl; } if( !covDepths.empty() && covDepths != "-" ) { cerr << "Warning: --covDepths (-D) option ignored without --covStats (-C) option." << endl; } if( !readType.empty() ) { cerr << "Warning: --readType (-T) option ignored without --covStats (-C) option." << endl; } // read regions for input only and/or creating sumStats if( !targetRegions.empty() || explicitNoTargetRegions || !sumstatsFile.empty() ) { regions = new RegionCoverage(references); } } else if( readType == "trgreads" || readType == "amplicon" || readType == "AmpliSeq" ) { if( haveBbcFile ) { cerr << "Creation of read coverage requires BAM file input." << endl; return -1; } AmpliconRegionStatistics *ampRegionStats = new AmpliconRegionStatistics(references); ampRegionStats->SetGenericReads( readType == "trgreads" ); ampRegionStats->SetSigFacCoverage( minPcCov/100 ); ampRegionStats->SetMaxUpstreamPrimerStart( primerLength ); ampRegionStats->SetMaxE2eEndDistance( maxE2eEndGap ); covstatsStaticFields = "overlaps,"; covstatsStaticFields += (minPcCov > 0) ? "fwd_cov,rev_cov" : "fwd_e2e,rev_e2e"; covstatsStaticFields += ",total_reads,fwd_reads,rev_reads"; regions = ampRegionStats; } else if( readType == "trgbases" ) { if( haveBbcFile && targetRegions.empty() && !explicitNoTargetRegions ) { cerr << "Warning: Assuming reference contigs for base coverage targets (=> option --regions -)" << endl; } RegionStatistics *regionStats = new RegionStatistics(references); covstatsStaticFields = "covered,uncov_5p,uncov_3p,ave_basereads,fwd_basereads,rev_basereads"; trackRegionBaseCov = true; regions = regionStats; } else if( readType == "covdepth" || readType.empty() ) { // output (sorted) targets file with only covDepth stats (if any) regions = new RegionCoverage(references); } else { cerr << "Unknown read type '" << readType << "'" << endl; return -1; } // Load the input regions or default to whole reference contig targets if( regions ) { regions->SetCovAtDepths( covDepths == "-" ? "20,100,500" : covDepths ); if( targetRegions.empty() ) { regions->SetWholeContigTargets(); // set contigs as explicit regions means all reads will seen as on-target // for consistency these are inverted (for input from BBC) invertOnTarget = true; } else { string auxFieldIdx = auxRegionSplit.size() ? auxRegionSplit[0] : ""; string errMsg = regions->Load( targetRegions, "BED", auxFieldIdx ); if( !errMsg.empty() ) { cerr << "ERROR: " + errMsg + "\n"; return 1; } } if( onlyOnTargetReads && haveBbcFile ) { cerr << "Error: bbctools create --onTargetReads option is not supported for BBC source file." << endl; return -1; } } // // Perform all bbctools create utilities // BbcCreate *bbcCreate = NULL; if( !bbcfileRoot.empty() && (bbcfileRoot != "-" || !haveBbcFile) ) { bbcCreate = new BbcCreate(references); if( bbcfileRoot != "-" && !bbcCreate->Open(bbcfileRoot+".bbc") ) { return 1; } bbcCreate->SetNoOffTargetPositions(onlyOnTargetBases); } bbcView.SetNoOffTargetPositions(onlyOnTargetBases); // Stream input to output creators if( haveBbcFile ) { // BBC reader and driver via BbcView object if( bbcfileRoot != "-" || !covstatsFile.empty() ) { // disable BbcView text stream if using for file creation bbcView.SelectPrintStream("NONE"); } // process input BBC for just new BBC and target coverage (defer BCI/CBC) bbcView.SetBbcCreate(bbcCreate); bbcView.SetRegionCoverage(regions); // explicitNoTargetRegions intended for explicitly removing on-target coverage bbcView.SetInvertOnTarget(explicitNoTargetRegions ^ invertOnTarget); if( bbcCreate || regions || bbcfileRoot == "-" ) { bbcView.ReadAll(); } } else { // Test read tracking option for file write TrackReads *readTracker = NULL; try { if( !readOrigFile.empty() ) readTracker = new TrackReads( readOrigFile, regions ); } catch( std::runtime_error & ) { cerr << "ERROR: Unable to write to read tracking file " << readOrigFile << endl; return 1; } // BAM reader, BaseCoverage driver, dispatching to BbcCreate and BbcView objects BaseCoverage baseCov(references); baseCov.SetRegionCoverage(regions); baseCov.SetBbcCreate(bbcCreate); baseCov.SetInvertOnTarget(invertOnTarget); if( bbcfileRoot == "-" ) { baseCov.SetBbcView(&bbcView); } // Certain options require that all reads are processed, invalidating other performance options bool trackAllReads = !sumstatsFile.empty() || readTracker; // Implicit set of onlyOnTargetReads for performance when only these reads are required bool useBaseCov = (bbcfileRoot == "-" || bbcCreate); if( !targetRegions.empty() && !trackAllReads ) { onlyOnTargetReads |= onlyOnTargetBases; if( samdepth || !useBaseCov ) onlyOnTargetReads = true; } useBaseCov |= trackRegionBaseCov; // do not allow jumping if sumStats option is used - need to count all reads bool bamReaderSetRegions = (s_useBamReaderJump && !trackAllReads); int trgContig = 0, trgSrtPos = 0, trgEndPos = 0; int minJumpLen = s_initialMinJumpLen; int maxReadLen = s_initialMaxReadLen; if( onlyOnTargetReads ) { // load/create BAM index files for targeted reading // Note: BamIndex::BAMTOOLS format performed very badly and cannot use mixed with BTI/BAI files if( bamReaderSetRegions && !bamReader.LocateIndexes() ) { string plural( cmdArgs.size() > 1 ? "s" : "" ); if( autoCreateBamIndex ) { cerr << "Warning: Did not locate BAM index (BAI) file" << plural << ", creating bamtools version..." << endl; // to avoid bug use new instance of BamMultiReader BamMultiReader bamReader2; if( !bamReader2.Open(cmdArgs) || !bamReader2.CreateIndexes() ) { cerr << "WARNING: Failed to create BAM index file" << plural << "." << endl; bamReaderSetRegions = false; } else { if( cmdArgs.size() == 1 ) { cerr << "Successfully created BAM index file: " << BbcUtils::fileName(cmdArgs[0]) << ".bai" << endl; } else { cerr << "Successfully created BAM index files." << endl; } // re-locate indexes with first reader - could not seem to locate BTI files created! if( !bamReader.LocateIndexes() ) { cerr << "WARNING: Failed to locate BAM index file" << plural << " just created!" << endl; bamReaderSetRegions = false; } } } else { cerr << "Warning: BAM index file" << plural << " not located for targeted BAM access." << endl; bamReaderSetRegions = false; } } // cancel region filtering if there are no regions to iterate (unexpected) if( !regions->GetNextRegion( trgContig, trgSrtPos, trgEndPos ) ) { onlyOnTargetReads = bamReaderSetRegions = false; } if( bamReaderSetRegions ) { bamReader.Jump( trgContig, trgSrtPos-maxReadLen ); } } BamAlignment aln; while( bamReader.GetNextAlignmentCore(aln) ) { // appears to be an undocumented behavior here if( aln.RefID < 0 ) continue; // skip filtered reads by flag, length or mapping quality if( aln.AlignmentFlag & skipFlag ) continue; if( aln.MapQuality < minMapQuality ) continue; int32_t endPos = aln.GetEndPosition(); if( minAlignLength > 0 ) { if( endPos - aln.Position < minAlignLength ) continue; } // screen for on-target reads if( onlyOnTargetReads ) { // find next region overlapping or beyond of current read bool moreRegions = true; bool setRegion = false; while( aln.RefID > trgContig || (aln.RefID == trgContig && aln.Position > trgEndPos) ) { if( !regions->GetNextRegion( trgContig, trgSrtPos, trgEndPos ) ) { moreRegions = false; break; } setRegion = bamReaderSetRegions; } if( !moreRegions ) { // prevent further on-target checks and exit early if not using sumStats onlyOnTargetReads = false; if( trackAllReads ) { // force tracking of off-target reads regions->TrackReadsOnRegion(aln,endPos); if( readTracker ) readTracker->Write(aln,endPos); continue; } break; } if( setRegion ) { // track max read length for future index jumps - just in case long reads ever used if( endPos - aln.Position > maxReadLen ) { maxReadLen = endPos - aln.Position; if( maxReadLen > minJumpLen ) minJumpLen = maxReadLen; } if( aln.RefID != trgContig || trgSrtPos - aln.Position > minJumpLen ) { bamReader.Jump( trgContig, trgSrtPos-maxReadLen ); } } if( aln.RefID < trgContig || endPos < trgSrtPos ) { // force tracking of off-target reads if( trackAllReads ) { regions->TrackReadsOnRegion(aln,endPos); if( readTracker ) readTracker->Write(aln,endPos); } continue; // current is before next target region - fetch the next within bounds } } // record base coverage and region coverage statistics if( useBaseCov ) { endPos = baseCov.AddAlignment(aln,endPos); if( endPos <= 0 ) { if( endPos == 0 ) continue; // read was silently ignored cerr << "ERROR: BAM file is not correctly sorted vs. reference." << endl; return 1; } } // record read coverage and region coverage statistics if( regions ) { regions->TrackReadsOnRegion(aln,endPos); } if( readTracker ) { readTracker->Write(aln,endPos); } } // flush and close objects associated with output baseCov.Flush(); } // Output in-memory region stats file and ensure BBC file is closed if( regions ) { // build output fields title string string outFields = "contig_id,contig_srt,contig_end"; if( !auxRegionSplit.empty() ) outFields += "," + auxRegionSplit[1]; if( !covstatsStaticFields.empty() ) outFields += "," + covstatsStaticFields; regions->Write( covstatsFile, outFields ); if( !sumstatsFile.empty() ) { regions->WriteSummary( sumstatsFile, invertOnTarget ); } delete regions; } delete bbcCreate; // Complete remaining file creation options using a BBC file input // NOTE: Using BbbCreate for this would require code duplication and concurrent file output streaming if( f_bci || f_cbc ) { // Check BBC file source if( haveBbcFile ) { bbcfileRoot = cmdArgs[0]; int i = bbcfileRoot.size() - 4; if( i > 0 && bbcfileRoot.substr(i,4) == ".bbc" ) { bbcfileRoot = bbcfileRoot.substr(0,i); } } else if( !bbcView.Open( bbcfileRoot+".bbc", true ) ) { cerr << "ERROR: Unexpected failure to read new BBC file '"+bbcfileRoot+".bam'" << endl; return 1; } if( f_bci ) { BbcIndex indexer( bbcfileRoot+".bci" ); if( !bbcView.CreateIndex(indexer) ) { cerr << "ERROR: Failed to create index file '" << bbcfileRoot << ".bci'" << endl; return 1; } } if( f_cbc ) { // CBC generation can use BCI file but is no faster since whole BBC file is read BbcCoarse cbcWriter( bbcfileRoot+".cbc" ); if( !bbcView.CreateCbc(cbcWriter) ) { cerr << "ERROR: Failed to create coarse base coverage file '" << bbcfileRoot << ".cbc'" << endl; return 1; } } } return 0; }
bool MergeTool::MergeToolPrivate::Run(void) { // set to default input if none provided if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn()); // opens the BAM files (by default without checking for indexes) BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools merge ERROR: could not open input BAM file(s)... Aborting." << endl; return false; } // retrieve header & reference dictionary info std::string mergedHeader = reader.GetHeaderText(); RefVector references = reader.GetReferenceData(); // determine compression mode for BamWriter bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); BamWriter::CompressionMode compressionMode = BamWriter::Compressed; if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed; // open BamWriter BamWriter writer; writer.SetCompressionMode(compressionMode); if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references) ) { cerr << "bamtools merge ERROR: could not open " << m_settings->OutputFilename << " for writing." << endl; reader.Close(); return false; } // if no region specified, store entire contents of file(s) if ( !m_settings->HasRegion ) { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); } // otherwise attempt to use region as constraint else { // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { // attempt to find index files reader.LocateIndexes(); // if index data available for all BAM files, we can use SetRegion if ( reader.HasIndexes() ) { // attempt to use SetRegion(), if failed report error if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { cerr << "bamtools merge ERROR: set region failed. Check that REGION describes a valid range" << endl; reader.Close(); return false; } // everything checks out, just iterate through specified region, storing alignments BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); } // no index data available, we have to iterate through until we // find overlapping alignments else { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { writer.SaveAlignment(al); } } } } // error parsing REGION string else { cerr << "bamtools merge ERROR: could not parse REGION - " << m_settings->Region << endl; cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" << endl; reader.Close(); writer.Close(); return false; } } // clean & exit reader.Close(); writer.Close(); return true; }
int main ( int argc, char *argv[] ) { struct parameters *param = 0; param = interface(param, argc, argv); //region file input (the region file should be sorted as the same way as the bam file) ifstream region_f; region_f.open(param->region_f, ios_base::in); // the region file is opened //bam input and generate index if not yet //-------------------------------------------------------------------------------------------------------+ // BAM input (file or filenames?) | //-------------------------------------------------------------------------------------------------------+ char *fof = param->mapping_f; FILE *IN=NULL; char linefof[5000]; int filecount=0; vector <string> fnames; if (strchr(fof,' ')!=NULL) { char *ptr; ptr=strtok(fof," "); while (ptr!=NULL) { fnames.push_back(ptr); filecount++; ptr=strtok(NULL," "); } } else { IN=fopen(fof,"rt"); if (IN!=NULL) { long linecount=0; while (fgets(linefof,5000-1,IN)!=NULL) { linecount++; if (linefof[0]!='#' && linefof[0]!='\n') { char *ptr=strchr(linefof,'\n'); if (ptr!=NULL && ptr[0]=='\n') { ptr[0]='\0'; } FILE *dummy=NULL; dummy=fopen(linefof,"rt"); if (dummy!=NULL) { // seems to be a file of filenames... fclose(dummy); fnames.push_back(linefof); filecount++; } else if (filecount==0 || linecount>=1000-1) { // seems to be a single file fnames.push_back(fof); filecount++; break; } } } fclose(IN); } } //file or file name decided and stored in vector "fnames" cerr << "the input mapping files are:" << endl; vector <string>::iterator fit = fnames.begin(); for(; fit != fnames.end(); fit++) { cerr << *fit << endl; } //-------------------------------------------------------------------------------------------------------+ // end of file or filenames | //-------------------------------------------------------------------------------------------------------+ // open the BAM file(s) BamMultiReader reader; reader.Open(fnames); // get header & reference information string header = reader.GetHeaderText(); RefVector refs = reader.GetReferenceData(); if ( ! reader.LocateIndexes() ) // opens any existing index files that match our BAM files reader.CreateIndexes(); // creates index files for BAM files that still lack one // locus bias struct lb empty_profile = {0,0,0,0}; vector <struct lb> locus_b(1000, empty_profile); // output locus bias file string locus_bias_set = param->lbias; ofstream locus_bias; if ( locus_bias_set != "" ) { locus_bias.open(param->lbias); if ( !locus_bias ) { cerr << "can not open locus_bias file.\n"; exit(0); } } //should decide which chromosome string line; string old_chr = "SRP"; string type = param->type; //whether do some position-level pile-up stuff bool posc = false; ofstream posc_f; ofstream chrmap_f; string poscset = param->posc; if ( poscset != "" ) { posc = true; posc_f.open(param->posc); chrmap_f.open(param->chrmap); } bool noChr; if ( param->nochr == 1 ){ noChr = true; } else { noChr = false; } //regions for the input of region file deque <struct region> regions; getline(region_f, line); //get the first line eatline(line,regions,noChr); deque <struct region>::iterator it = regions.begin(); while ( it->chr != old_chr ) { old_chr = it->chr; // set the current chr as old chr int chr_id = reader.GetReferenceID(it->chr); if ( chr_id == -1 ) { //reference not found for (; it != regions.end() && it->chr == old_chr; ) { gene_processing(*it,locus_b); // print the old region info it = regions.erase(it); // erase the current region } while ( regions.empty() ) { getline(region_f, line); if ( region_f.eof() ){ cerr << "finished: end of region file, zone 0" << endl; break; } eatline(line, regions,noChr); it = regions.begin(); if (it->chr == old_chr){ gene_processing(*it,locus_b); regions.clear(); continue; } } continue; } int chr_len = refs.at(chr_id).RefLength; if ( !reader.SetRegion(chr_id, 1, chr_id, chr_len) ) // here set region { cerr << "bamtools count ERROR: Jump region failed " << it->chr << endl; reader.Close(); exit(1); } //pile-up pos stats set <string> fragment; map <string, unsigned int> pileup; bool isposPileup = false; unsigned int old_start = 0; unsigned int total_tags = 0; unsigned int total_pos = 0; unsigned int pileup_pos = 0; BamAlignment bam; while (reader.GetNextAlignment(bam)) { if ( bam.IsMapped() == false ) continue; // skip unaligned reads unsigned int unique; bam.GetTag("NH", unique); if (param->unique == 1) { if (unique != 1) { // skipe uniquelly mapped reads continue; } } if (read_length == 0){ read_length = bam.Length; } //cout << bam.Name << endl; string chrom = refs.at(bam.RefID).RefName; string strand = "+"; if (bam.IsReverseStrand()) strand = "-"; unsigned int alignmentStart = bam.Position+1; unsigned int mateStart; if (type == "p") mateStart = bam.MatePosition+1; unsigned int alignmentEnd = bam.GetEndPosition(); unsigned int cigarEnd; vector <int> blockLengths; vector <int> blockStarts; blockStarts.push_back(0); ParseCigar(bam.CigarData, blockStarts, blockLengths, cigarEnd); // position check for unique mapped reads (because is paired-end reads, shoule base on fragment level for paired end reads) if (posc == true && unique == 1) { if (type == "p" && fragment.count(bam.Name) > 0) fragment.erase(bam.Name); else { total_tags++; if (type == "p"){ fragment.insert(bam.Name); } string alignSum; if (type == "p") { alignSum = int2str(alignmentStart) + "\t" + int2str(mateStart) + "\t.\t" + strand; } else { alignSum = int2str(alignmentStart) + "\t" + int2str(alignmentEnd) + "\t.\t" + strand; } if ( alignmentStart != old_start ) { isposPileup = false; map <string, unsigned int>::iterator pit = pileup.begin(); for (; pit != pileup.end(); pit++) { posc_f << chrom << "\truping\tpileup\t" << pit->first << "\t.\t" << "Pileup=" << pit->second << endl; //print pileup } pileup.clear(); //clear pileup set pileup.insert( pair <string, unsigned int> (alignSum, 1) ); //insert the new read total_pos++; } else if ( alignmentStart == old_start ) { // same starts if ( pileup.count(alignSum) > 0 ) { // pileup if ( pileup[alignSum] == 1 && isposPileup == false ) { pileup_pos++; isposPileup = true; } pileup[alignSum]++; } else { pileup.insert( pair <string, unsigned int> (alignSum, 1) ); } } //same starts } //new fragment old_start = alignmentStart; } // do pos check float incre = 1.; if (blockStarts.size() > 1) incre = 0.5; // incre half for junction reads incre /= static_cast < float >(unique); // for multi aligned reads deque <struct region>::iterator iter = regions.begin(); if ( iter->start > alignmentEnd ) continue; // skip reads not overlapping with the first region while ( iter->chr == old_chr && iter->start <= alignmentEnd && iter != regions.end() ) { if (iter->end < alignmentStart) { // the region end is beyond the alignmentStart gene_processing(*iter,locus_b); // processing iter = regions.erase(iter); // this region should be removed if ( regions.empty() ) { getline(region_f, line); // get a line of region file if ( ! region_f.eof() ) { eatline(line, regions, noChr); // eat a line and put it into the duque iter = regions.begin(); } else { // it's reaching the end of the region file cerr << "finished: end of region file, zone 3" << endl; break; } } continue; } if (iter->end >= alignmentStart && iter->start <= alignmentEnd) { //overlapping, should take action vector <int>::iterator cigit = blockStarts.begin(); for (; cigit != blockStarts.end(); cigit++) { unsigned int current_start = *cigit + alignmentStart; int current_pos = current_start - (iter->start); //cout << iter->chr << "\t" << iter->start << "\t" << iter->end << "\t" << current_start << endl; if ( (iter->tags).count(current_pos) > 0 ) { (iter->tags)[current_pos] += incre; } else (iter->tags).insert( pair<int, float>(current_pos, incre) ); } } // overlapping take action! if ( (iter+1) != regions.end() ) iter++; // if this region is not the last element in the deque else { // the last element getline(region_f, line); // get a line of region file if ( ! region_f.eof() ){ eatline(line, regions, noChr); // eat a line and put it into the duque iter = regions.end(); iter--; } else { //it's reaching the end of the region file cerr << "finished: end of region file, zone 4" << endl; break; } } } //while } // read a bam // print chr map if (posc == true) { chrmap_f << old_chr << "\t" << total_tags << "\t" << total_pos << "\t" << pileup_pos << endl; } //somehow to loop back it = regions.begin(); //reset to begin for (; it != regions.end() && it->chr == old_chr; ) { gene_processing(*it,locus_b); // print the old region info it = regions.erase(it); // erase the current region } while ( regions.empty() ) { getline(region_f, line); if ( region_f.eof() ){ cerr << "finished: end of region file, zone 5" << endl; //print locus bias for (unsigned int l = 0; l < 1000; l++){ locus_bias << l << "\t" << locus_b[l].ps << "\t" << locus_b[l].hs << "\t" << locus_b[l].pe << "\t" << locus_b[l].he << endl; } exit(0); } eatline(line, regions, noChr); it = regions.begin(); if (it->chr == old_chr){ gene_processing(*it, locus_b); regions.clear(); continue; } } } // region chr != old chr regions.clear(); reader.Close(); region_f.close(); return 0; } //main
int FileReader::runInternal() { ogeNameThread("am_FileReader"); if(!format_specified) format = deduceFileFormat(); if(format == FORMAT_BAM) { BamMultiReader reader; if(!reader.Open(filenames)) { cerr << "Error opening BAM files." << endl; reader.Close(); return -1; } header = reader.GetHeader(); references = reader.GetReferenceData(); open = true; BamAlignment * al; while(true) { if(load_string_data) al = reader.GetNextAlignment(); else al = reader.GetNextAlignmentCore(); if(!al) break; putOutputAlignment(al); } reader.Close(); } else if(format == FORMAT_SAM) { vector<SamReader> readers; SamHeader first_header; // before doing any reading, open the files to // verify they are the right format, etc. for(int i = 0; i < filenames.size(); i++) { SamReader reader; if(!reader.Open(filenames[i])) { cerr << "Error opening SAM file: " << filenames[i] << endl; return -1; } if(filenames.size() > 1 && i == 0) first_header = header; // TODO: We can probably find a better way to deal with multiple SAM file headers, // but for now we should disallow different headers to avoid issues. if(i > 0 && header.ToString() != first_header.ToString()) cerr << "Warning! SAM input files have different headers." << endl; reader.Close(); } for(int i = 0; i < filenames.size(); i++) { SamReader reader; if(!reader.Open(filenames[i])) { cerr << "Error opening SAM file: " << filenames[i] << endl; return -1; } header = reader.GetHeader(); references = reader.GetReferenceData(); open = true; if(filenames.size() > 1 && i == 0) first_header = header; BamAlignment * al = NULL; while(true) { al = reader.GetNextAlignment(); if(NULL == al) break; putOutputAlignment(al); } reader.Close(); } } else { cerr << "FileReader couldn't detect file format. Aborting." << endl; exit(-1); return -1; } return 0; }
int MergeTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); // set to default input if none provided if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn()); // opens the BAM files (by default without checking for indexes) BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles, false, true) ) { cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; return 1; } // retrieve header & reference dictionary info std::string mergedHeader = reader.GetHeaderText(); RefVector references = reader.GetReferenceData(); // open writer BamWriter writer; bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references, writeUncompressed) ) { cerr << "ERROR: Could not open BAM file " << m_settings->OutputFilename << " for writing... Aborting." << endl; reader.Close(); return 1; } // if no region specified, store entire contents of file(s) if ( !m_settings->HasRegion ) { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); } // otherwise attempt to use region as constraint else { // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { // attempt to re-open reader with index files reader.Close(); bool openedOK = reader.Open(m_settings->InputFiles, true, true ); // if error if ( !openedOK ) { cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; return 1; } // if index data available, we can use SetRegion if ( reader.IsIndexLoaded() ) { // attempt to use SetRegion(), if failed report error if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl; reader.Close(); return 1; } // everything checks out, just iterate through specified region, storing alignments BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) writer.SaveAlignment(al); } // no index data available, we have to iterate through until we // find overlapping alignments else { BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { writer.SaveAlignment(al); } } } } // error parsing REGION string else { cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl; cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl; reader.Close(); writer.Close(); return 1; } } // clean & exit reader.Close(); writer.Close(); return 0; }