bool processReadPair(const BamAlignment& al1, const BamAlignment& al2, const RefVector& refs, const int32_t totalTail, const int32_t critTail, const bool diff_ref) { if ((al1.IsFirstMate() && al2.IsFirstMate()) || (al1.IsSecondMate() && al2.IsSecondMate())) { cerr << "Incompatible mate orders: name1 = " << al1.Name << " is1stmate " << al1.IsFirstMate() << " is2ndmate " << al1.IsSecondMate() << " name2 = " << al2.Name << " is1stmate " << al2.IsFirstMate() << " is2ndmate " << al2.IsSecondMate() << endl; exit(1); } int32_t total_tail = -1; if (! (total_tail = checkLinkPair(al1, al2, refs, totalTail, critTail, diff_ref))) { return false; // reject all but link pairs // continue; } if (critTail && ! checkLinkPairCandidate(al1, refs, critTail) && ! checkLinkPairCandidate(al2, refs, critTail)) { return false; // neither read was a link pair candidate } if (debug_processReadPair) cout << "---------------------------------" << endl; int32_t lpc_tail1 = checkLinkPairCandidate(al1, refs, critTail); int32_t lpc_tail2 = checkLinkPairCandidate(al2, refs, critTail); if (debug_processReadPair) { printAlignmentInfo(al1, refs); if (lpc_tail1) { cout << "LINK PAIR CANDIDATE "; cout << ((lpc_tail1 > 0) ? "--->" : "<---") << " " << lpc_tail1 << endl; } printAlignmentInfo(al2, refs); if (lpc_tail2) { cout << "LINK PAIR CANDIDATE "; cout << ((lpc_tail2 > 0) ? "--->" : "<---") << " " << lpc_tail2 << endl; } cout << "TOTAL TAIL " << (abs(readTail(al1, refs)) + abs(readTail(al2, refs))) << endl; } return true; }
// use current input alignment to update BAM file alignment stats void StatsTool::StatsToolPrivate::ProcessAlignment(const BamAlignment& al) { // increment total alignment counter ++numReads; // check the paired-independent flags if ( al.IsDuplicate() ) ++numDuplicates; if ( al.IsFailedQC() ) ++numFailedQC; if ( al.IsMapped() ) ++numMapped; // check forward/reverse strand if ( al.IsReverseStrand() ) ++numReverseStrand; else ++numForwardStrand; // if alignment is paired-end if ( al.IsPaired() ) { // increment PE counter ++numPaired; // increment first mate/second mate counters if ( al.IsFirstMate() ) ++numFirstMate; if ( al.IsSecondMate() ) ++numSecondMate; // if alignment is mapped, check mate status if ( al.IsMapped() ) { // if mate mapped if ( al.IsMateMapped() ) ++numBothMatesMapped; // else singleton else ++numSingletons; } // check for explicit proper pair flag if ( al.IsProperPair() ) ++numProperPair; // store insert size for first mate if ( settings->IsShowingInsertSizeSummary && al.IsFirstMate() && (al.InsertSize != 0) ) { int insertSize = abs(al.InsertSize); insertSizes.push_back( insertSize ); } } }
bool check(const PropertyFilter& filter, const BamAlignment& al) { bool keepAlignment = true; const PropertyMap& properties = filter.Properties; PropertyMap::const_iterator propertyIter = properties.begin(); PropertyMap::const_iterator propertyEnd = properties.end(); for ( ; propertyIter != propertyEnd; ++propertyIter ) { // check alignment data field depending on propertyName const string& propertyName = (*propertyIter).first; const PropertyFilterValue& valueFilter = (*propertyIter).second; if ( propertyName == ALIGNMENTFLAG_PROPERTY ) keepAlignment &= valueFilter.check(al.AlignmentFlag); else if ( propertyName == CIGAR_PROPERTY ) { stringstream cigarSs; const vector<CigarOp>& cigarData = al.CigarData; if ( !cigarData.empty() ) { vector<CigarOp>::const_iterator cigarBegin = cigarData.begin(); vector<CigarOp>::const_iterator cigarIter = cigarBegin; vector<CigarOp>::const_iterator cigarEnd = cigarData.end(); for ( ; cigarIter != cigarEnd; ++cigarIter ) { const CigarOp& op = (*cigarIter); cigarSs << op.Length << op.Type; } keepAlignment &= valueFilter.check(cigarSs.str()); } } else if ( propertyName == INSERTSIZE_PROPERTY ) keepAlignment &= valueFilter.check(al.InsertSize); else if ( propertyName == ISDUPLICATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsDuplicate()); else if ( propertyName == ISFAILEDQC_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFailedQC()); else if ( propertyName == ISFIRSTMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFirstMate()); else if ( propertyName == ISMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMapped()); else if ( propertyName == ISMATEMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateMapped()); else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateReverseStrand()); else if ( propertyName == ISPAIRED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPaired()); else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPrimaryAlignment()); else if ( propertyName == ISPROPERPAIR_PROPERTY ) keepAlignment &= valueFilter.check(al.IsProperPair()); else if ( propertyName == ISREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsReverseStrand()); else if ( propertyName == ISSECONDMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsSecondMate()); else if ( propertyName == ISSINGLETON_PROPERTY ) { const bool isSingleton = al.IsPaired() && al.IsMapped() && !al.IsMateMapped(); keepAlignment &= valueFilter.check(isSingleton); } else if ( propertyName == MAPQUALITY_PROPERTY ) keepAlignment &= valueFilter.check(al.MapQuality); else if ( propertyName == MATEPOSITION_PROPERTY ) keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) ); else if ( propertyName == MATEREFERENCE_PROPERTY ) { if ( !al.IsPaired() || !al.IsMateMapped() ) return false; BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID"); const string& refName = filterToolReferences.at(al.MateRefID).RefName; keepAlignment &= valueFilter.check(refName); } else if ( propertyName == NAME_PROPERTY ) keepAlignment &= valueFilter.check(al.Name); else if ( propertyName == POSITION_PROPERTY ) keepAlignment &= valueFilter.check(al.Position); else if ( propertyName == QUERYBASES_PROPERTY ) keepAlignment &= valueFilter.check(al.QueryBases); else if ( propertyName == REFERENCE_PROPERTY ) { BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID"); const string& refName = filterToolReferences.at(al.RefID).RefName; keepAlignment &= valueFilter.check(refName); } else if ( propertyName == TAG_PROPERTY ) keepAlignment &= checkAlignmentTag(valueFilter, al); else BAMTOOLS_ASSERT_UNREACHABLE; // if alignment fails at ANY point, just quit and return false if ( !keepAlignment ) return false; } BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here"); return keepAlignment; }
int main (int argc, char *argv[]) { bool mapped =false; bool unmapped=false; const string usage=string(string(argv[0])+" [options] input.bam out.bam"+"\n\n"+ "This program takes a BAM file as input and produces\n"+ "another where the putative deaminated bases have\n"+ "have been cut\n"+ "\n"+ "Options:\n"); // "\t"+"-u , --unmapped" +"\n\t\t"+"For an unmapped bam file"+"\n"+ // "\t"+"-m , --mapped" +"\n\t\t"+"For an mapped bam file"+"\n"); if( (argc== 1) || (argc== 2 && string(argv[1]) == "-h") || (argc== 2 && string(argv[1]) == "-help") || (argc== 2 && string(argv[1]) == "--help") ){ cout<<"Usage:"<<endl; cout<<usage<<endl; cout<<""<<endl; return 1; } // for(int i=1;i<(argc-1);i++){ //all but the last arg // if(strcmp(argv[i],"-m") == 0 || strcmp(argv[i],"--mapped") == 0 ){ // mapped=true; // continue; // } // if(strcmp(argv[i],"-u") == 0 || strcmp(argv[i],"--unmapped") == 0 ){ // unmapped=true; // continue; // } // cerr<<"Unknown option "<<argv[i] <<" exiting"<<endl; // return 1; // } if(argc != 3){ cerr<<"Error: Must specify the input and output BAM files"; return 1; } string inbamFile =argv[argc-2]; string outbamFile=argv[argc-1]; // if(!mapped && !unmapped){ // cerr << "Please specify whether you reads are mapped or unmapped" << endl; // return 1; // } // if(mapped && unmapped){ // cerr << "Please specify either mapped or unmapped but not both" << endl; // return 1; // } BamReader reader; if ( !reader.Open(inbamFile) ) { cerr << "Could not open input BAM files." << endl; return 1; } vector<RefData> testRefData=reader.GetReferenceData(); const SamHeader header = reader.GetHeader(); const RefVector references = reader.GetReferenceData(); BamWriter writer; if ( !writer.Open(outbamFile, header, references) ) { cerr << "Could not open output BAM file" << endl; return 1; } BamAlignment al; // BamAlignment al2; // bool al2Null=true; while ( reader.GetNextAlignment(al) ) { if(al.IsPaired() ){ if(al.IsFirstMate() ){ //5' end, need to check first base only if(al.IsReverseStrand()){ // if(!al.IsMapped()){ cerr << "Cannot have reverse complemented unmapped reads: " <<al.Name<< endl; //return 1; } int indexToCheck; //last indexToCheck=al.QueryBases.length()-1; if(toupper(al.QueryBases[indexToCheck]) == 'A'){ //al.Qualities[indexToCheck]=char(offset+baseQualForDeam); al.QueryBases = al.QueryBases.substr(0,indexToCheck); al.Qualities = al.Qualities.substr(0, indexToCheck); } }else{ int indexToCheck; //first base indexToCheck=0; if(toupper(al.QueryBases[indexToCheck]) == 'T'){ //al.Qualities[indexToCheck]=char(offset+baseQualForDeam); al.QueryBases = al.QueryBases.substr(indexToCheck+1); al.Qualities = al.Qualities.substr(indexToCheck+1); } } }else{ //3' end, need to check last two bases only if( al.IsSecondMate() ){ if(al.IsReverseStrand()){ // if(!al.IsMapped()){ cerr << "Cannot have reverse complemented unmapped reads: " <<al.Name<< endl; //return 1; } int indexToCheck; //second to last indexToCheck=al.QueryBases.length()-2; if(toupper(al.QueryBases[indexToCheck]) == 'T'){ //al.Qualities[indexToCheck]=char(offset+baseQualForDeam); al.QueryBases = al.QueryBases.substr(0,indexToCheck); al.Qualities = al.Qualities.substr(0, indexToCheck); }else{ //last indexToCheck=al.QueryBases.length()-1; if(toupper(al.QueryBases[indexToCheck]) == 'T'){ //al.Qualities[indexToCheck]=char(offset+baseQualForDeam); al.QueryBases = al.QueryBases.substr(0,indexToCheck); al.Qualities = al.Qualities.substr(0, indexToCheck); } } }else{ int indexToCheck; //second base indexToCheck=1; if(toupper(al.QueryBases[indexToCheck]) == 'A'){ //al.Qualities[indexToCheck]=char(offset+baseQualForDeam); al.QueryBases = al.QueryBases.substr(indexToCheck+1); al.Qualities = al.Qualities.substr(indexToCheck+1); }else{ //first base indexToCheck=0; if(toupper(al.QueryBases[indexToCheck]) == 'A'){ //al.Qualities[indexToCheck]=char(offset+baseQualForDeam); al.QueryBases = al.QueryBases.substr(indexToCheck+1); al.Qualities = al.Qualities.substr(indexToCheck+1); } } } }else{ cerr << "Wrong state" << endl; return 1; } } }//end of paired end else{//we consider single reads to have been sequenced from 5' to 3' if(al.IsReverseStrand()){ //need to consider if(!al.IsMapped()){ cerr << "Cannot have reverse complemented unmapped reads: " <<al.Name<< endl; //return 1; } int indexToCheck; //second base indexToCheck=1; if(toupper(al.QueryBases[indexToCheck]) == 'A'){ // al.Qualities[indexToCheck]=char(offset+baseQualForDeam); // cout<<"51 "<<al.QueryBases<<endl; // cout<<"51 "<<al.Qualities<<endl; al.QueryBases = al.QueryBases.substr(indexToCheck+1); al.Qualities = al.Qualities.substr(indexToCheck+1); // cout<<"52 "<<al.QueryBases<<endl; // cout<<"52 "<<al.Qualities<<endl; }else{ //first base indexToCheck=0; if(toupper(al.QueryBases[indexToCheck]) == 'A'){ // al.Qualities[indexToCheck]=char(offset+baseQualForDeam); // cout<<"61 "<<al.QueryBases<<endl; // cout<<"61 "<<al.Qualities<<endl; al.QueryBases = al.QueryBases.substr(indexToCheck+1); al.Qualities = al.Qualities.substr(indexToCheck+1); // cout<<"62 "<<al.QueryBases<<endl; // cout<<"62 "<<al.Qualities<<endl; } } //last indexToCheck=al.QueryBases.length()-1; if(toupper(al.QueryBases[indexToCheck]) == 'A'){ // al.Qualities[indexToCheck]=char(offset+baseQualForDeam); // cout<<"21 "<<al.QueryBases<<endl; // cout<<"21 "<<al.Qualities<<endl; al.QueryBases = al.QueryBases.substr(0,indexToCheck); al.Qualities = al.Qualities.substr(0, indexToCheck); // cout<<"22 "<<al.QueryBases<<endl; // cout<<"22 "<<al.Qualities<<endl; } }else{ int indexToCheck; //first base indexToCheck=0; if(toupper(al.QueryBases[indexToCheck]) == 'T'){ // al.Qualities[indexToCheck]=char(offset+baseQualForDeam); // cout<<"11 "<<al.QueryBases<<endl; // cout<<"11 "<<al.Qualities<<endl; al.QueryBases = al.QueryBases.substr(indexToCheck+1); al.Qualities = al.Qualities.substr(indexToCheck+1); // cout<<"12 "<<al.QueryBases<<endl; // cout<<"12 "<<al.Qualities<<endl; } //second to last indexToCheck=al.QueryBases.length()-2; if(toupper(al.QueryBases[indexToCheck]) == 'T'){ // al.Qualities[indexToCheck]=char(offset+baseQualForDeam); // cout<<"31 "<<al.QueryBases<<endl; // cout<<"31 "<<al.Qualities<<endl; al.QueryBases = al.QueryBases.substr(0,indexToCheck); al.Qualities = al.Qualities.substr(0, indexToCheck); // cout<<"32 "<<al.QueryBases<<endl; // cout<<"32 "<<al.Qualities<<endl; }else{ //last indexToCheck=al.QueryBases.length()-1; if(toupper(al.QueryBases[indexToCheck]) == 'T'){ // al.Qualities[indexToCheck]=char(offset+baseQualForDeam); // cout<<"41 "<<al.QueryBases<<endl; // cout<<"41 "<<al.Qualities<<endl; al.QueryBases = al.QueryBases.substr(0,indexToCheck); al.Qualities = al.Qualities.substr(0, indexToCheck); // cout<<"42 "<<al.QueryBases<<endl; // cout<<"42 "<<al.Qualities<<endl; } } } }//end of single end writer.SaveAlignment(al); }// while ( reader.GetNextAlignment(al) ) { reader.Close(); writer.Close(); return 0; }
int main (int argc, char *argv[]) { string usage=string(""+string(argv[0])+" [in BAM file]"+ "\nThis program reads a BAM file and computes the error rate for each cycle\n"+ // "\nreads and the puts the rest into another bam file.\n"+ // "\nTip: if you do not need one of them, use /dev/null as your output\n"+ // "arguments:\n"+ // "\t"+"--bq [base qual] : Minimum base quality to flag a deaminated site (Default: "+stringify(minBaseQuality)+")\n"+ "\n"); if(argc == 1 || (argc == 2 && (string(argv[0]) == "-h" || string(argv[0]) == "--help") ) ){ cerr << "Usage "<<usage<<endl; return 1; } // for(int i=1;i<(argc-2);i++){ // if(string(argv[i]) == "--bq"){ // minBaseQuality=destringify<int>(argv[i+1]); // i++; // continue; // } // } string bamfiletopen = string( argv[ argc-1 ] ); // string deambam = string( argv[ argc-2 ] ); // string nondeambam = string( argv[ argc-1 ] ); BamReader reader; if ( !reader.Open(bamfiletopen) ) { cerr << "Could not open input BAM file"<< bamfiletopen << endl; return 1; } //iterating over the alignments for these regions BamAlignment al; bool pairedEnd=false; bool firstRead=true; while ( reader.GetNextAlignment(al) ) { if(firstRead){ //reads are either all paired end or single end, I don't allow a mix numberOfCycles=al.QueryBases.size(); // cout<<"numberOfCycles "<<numberOfCycles<<endl; if(al.IsPaired() ){ pairedEnd=true; matches = vector<unsigned int> (2*numberOfCycles,0); mismatches = vector<unsigned int> (2*numberOfCycles,0); typesOfMismatches = vector< vector<unsigned int> >(); for(int i=0;i<12;i++) typesOfMismatches.push_back( vector<unsigned int> (2*numberOfCycles,0) ); }else{ matches = vector<unsigned int> ( numberOfCycles,0); mismatches = vector<unsigned int> ( numberOfCycles,0); typesOfMismatches = vector< vector<unsigned int> >(); for(int i=0;i<12;i++) typesOfMismatches.push_back( vector<unsigned int> ( numberOfCycles,0) ); } firstRead=false; } if( ( pairedEnd && !al.IsPaired()) || ( !pairedEnd && al.IsPaired()) ){ cerr<<"Read "<<al.Name<<" is wrong, cannot have a mixture of paired and unpaired read for this program"<<endl; return 1; } //skip unmapped if(!al.IsMapped()) continue; if(numberOfCycles!=int(al.QueryBases.size())){ cerr<<"The length of read "<<al.Name<<" is wrong, should be "<<numberOfCycles<<"bp"<<endl; return 1; } string reconstructedReference = reconstructRef(&al); if(al.Qualities.size() != reconstructedReference.size()){ cerr<<"Quality line is not the same size as the reconstructed reference"<<endl; return 1; } if( pairedEnd ){ if( al.IsFirstMate() ){ //start cycle 0 if( al.IsReverseStrand() ){ increaseCounters(al,reconstructedReference,numberOfCycles-1,-1); //start cycle numberOfCycles-1 }else{ increaseCounters(al,reconstructedReference,0 , 1); //start cycle 0 } }else{ if( al.IsSecondMate() ){ if( al.IsReverseStrand() ){ increaseCounters(al,reconstructedReference,2*numberOfCycles-1,-1); //start cycle 2*numberOfCycles-1 }else{ increaseCounters(al,reconstructedReference,numberOfCycles , 1); //start cycle numberOfCycles } }else{ cerr<<"Reads "<<al.Name<<" must be either first or second mate"<<endl; return 1; } } }else{ //single end if( al.IsReverseStrand() ){ increaseCounters(al,reconstructedReference,numberOfCycles-1,-1); //start cycle numberOfCycles-1 }else{ increaseCounters(al,reconstructedReference,0 , 1); //start cycle 0 } } }//end while each read reader.Close(); cout<<"cycle\tmatches\tmismatches\tmismatches%\tA>C\tA>C%\tA>G\tA>G%\tA>T\tA>T%\tC>A\tC>A%\tC>G\tC>G%\tC>T\tC>T%\tG>A\tG>A%\tG>C\tG>C%\tG>T\tG>T%\tT>A\tT>A%\tT>C\tT>C%\tT>G\tT>G%"<<endl; for(unsigned int i=0;i<matches.size();i++){ cout<<(i+1); if( (matches[i]+mismatches[i]!=0) ) cout<<"\t"<<matches[i]<<"\t"<<mismatches[i]<<"\t"<< 100.0*(double(mismatches[i])/double(matches[i]+mismatches[i])) ; else cout<<"\t"<<matches[i]<<"\t"<<mismatches[i]<<"\tNA"; for(int j=0;j<12;j++){ cout<<"\t"<<typesOfMismatches[j][i]; if( (matches[i]+mismatches[i]!=0) ) cout<<"\t"<<100.0*double(typesOfMismatches[j][i])/double(matches[i]+mismatches[i]); else cout<<"\tNA"; } cout<<endl; } return 0; }
void BedWindow::WindowIntersectBam(string bamFile) { // load the "B" bed file into a map so // that we can easily compare "A" to it for overlaps _bedB->loadBedFileIntoMap(); // open the BAM file BamReader reader; BamWriter writer; reader.Open(bamFile); // get header & reference information string bamHeader = reader.GetHeaderText(); RefVector refs = reader.GetReferenceData(); // open a BAM output to stdout if we are writing BAM if (_bamOutput == true) { // set compression mode BamWriter::CompressionMode compressionMode = BamWriter::Compressed; if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed; writer.SetCompressionMode(compressionMode); // open our BAM writer writer.Open("stdout", bamHeader, refs); } vector<BED> hits; // vector of potential hits // reserve some space hits.reserve(100); _bedA->bedType = 6; BamAlignment bam; bool overlapsFound; // get each set of alignments for each pair. while (reader.GetNextAlignment(bam)) { if (bam.IsMapped()) { BED a; a.chrom = refs.at(bam.RefID).RefName; a.start = bam.Position; a.end = bam.GetEndPosition(false, false); // build the name field from the BAM alignment. a.name = bam.Name; if (bam.IsFirstMate()) a.name += "/1"; if (bam.IsSecondMate()) a.name += "/2"; a.score = ToString(bam.MapQuality); a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-"; if (_bamOutput == true) { overlapsFound = FindOneOrMoreWindowOverlaps(a); if (overlapsFound == true) { if (_noHit == false) writer.SaveAlignment(bam); } else { if (_noHit == true) writer.SaveAlignment(bam); } } else { FindWindowOverlaps(a, hits); hits.clear(); } } // BAM IsMapped() is false else if (_noHit == true) { writer.SaveAlignment(bam); } } // close the relevant BAM files. reader.Close(); if (_bamOutput == true) { writer.Close(); } }
void BedGenomeCoverage::CoverageBam(string bamFile) { ResetChromCoverage(); // open the BAM file BamReader reader; if (!reader.Open(bamFile)) { cerr << "Failed to open BAM file " << bamFile << endl; exit(1); } // get header & reference information string header = reader.GetHeaderText(); RefVector refs = reader.GetReferenceData(); // load the BAM header references into a BEDTools "genome file" _genome = new GenomeFile(refs); // convert each aligned BAM entry to BED // and compute coverage on B BamAlignment bam; while (reader.GetNextAlignment(bam)) { // skip if the read is unaligned if (bam.IsMapped() == false) continue; bool _isReverseStrand = bam.IsReverseStrand(); //changing second mate's strand to opposite if( _dUTP && bam.IsPaired() && bam.IsMateMapped() && bam.IsSecondMate()) _isReverseStrand = !bam.IsReverseStrand(); // skip if we care about strands and the strand isn't what // the user wanted if ( (_filterByStrand == true) && ((_requestedStrand == "-") != _isReverseStrand) ) continue; // extract the chrom, start and end from the BAM alignment string chrom(refs.at(bam.RefID).RefName); CHRPOS start = bam.Position; CHRPOS end = bam.GetEndPosition(false, false) - 1; // are we on a new chromosome? if ( chrom != _currChromName ) StartNewChrom(chrom); if(_pair_chip_) { // Skip if not a proper pair if (bam.IsPaired() && (!bam.IsProperPair() or !bam.IsMateMapped()) ) continue; // Skip if wrong coordinates if( ( (bam.Position<bam.MatePosition) && bam.IsReverseStrand() ) || ( (bam.MatePosition < bam.Position) && bam.IsMateReverseStrand() ) ) { //chemically designed: left on positive strand, right on reverse one continue; } /*if(_haveSize) { if (bam.IsFirstMate() && bam.IsReverseStrand()) { //put fragmentSize in to the middle of pair end_fragment int mid = bam.MatePosition+abs(bam.InsertSize)/2; if(mid<_fragmentSize/2) AddCoverage(0, mid+_fragmentSize/2); else AddCoverage(mid-_fragmentSize/2, mid+_fragmentSize/2); } else if (bam.IsFirstMate() && bam.IsMateReverseStrand()) { //put fragmentSize in to the middle of pair end_fragment int mid = start+abs(bam.InsertSize)/2; if(mid<_fragmentSize/2) AddCoverage(0, mid+_fragmentSize/2); else AddCoverage(mid-_fragmentSize/2, mid+_fragmentSize/2); } } else */ if (bam.IsFirstMate() && bam.IsReverseStrand()) { //prolong to the mate to the left AddCoverage(bam.MatePosition, end); } else if (bam.IsFirstMate() && bam.IsMateReverseStrand()) { //prolong to the mate to the right AddCoverage(start, start + abs(bam.InsertSize) - 1); } } else if (_haveSize) { if(bam.IsReverseStrand()) { if(end<_fragmentSize) { //sometimes fragmentSize is bigger :( AddCoverage(0, end); } else { AddCoverage(end + 1 - _fragmentSize, end ); } } else { AddCoverage(start,start+_fragmentSize - 1); } } else // add coverage accordingly. if (!_only_5p_end && !_only_3p_end) { bedVector bedBlocks; // we always want to split blocks when a D CIGAR op is found. // if the user invokes -split, we want to also split on N ops. if (_obeySplits) { // "D" true, "N" true GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, true); } else { // "D" true, "N" false GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, false); } AddBlockedCoverage(bedBlocks); } else if (_only_5p_end) { CHRPOS pos = ( !bam.IsReverseStrand() ) ? start : end; AddCoverage(pos,pos); } else if (_only_3p_end) { CHRPOS pos = ( bam.IsReverseStrand() ) ? start : end; AddCoverage(pos,pos); } } // close the BAM reader.Close(); // process the results of the last chromosome. ReportChromCoverage(_currChromCoverage, _currChromSize, _currChromName, _currChromDepthHist); // report all empty chromsomes PrintEmptyChromosomes(); // report the overall coverage if asked. PrintFinalCoverage(); }
void BedIntersect::IntersectBam(string bamFile) { // load the "B" bed file into a map so // that we can easily compare "A" to it for overlaps _bedB->loadBedFileIntoMap(); // open the BAM file BamReader reader; BamWriter writer; reader.Open(bamFile); // get header & reference information string header = reader.GetHeaderText(); RefVector refs = reader.GetReferenceData(); // open a BAM output to stdout if we are writing BAM if (_bamOutput == true) { // open our BAM writer writer.Open("stdout", header, refs, _isUncompressedBam); } vector<BED> hits; // reserve some space hits.reserve(100); _bedA->bedType = 6; BamAlignment bam; // get each set of alignments for each pair. while (reader.GetNextAlignment(bam)) { if (bam.IsMapped()) { BED a; a.chrom = refs.at(bam.RefID).RefName; a.start = bam.Position; a.end = bam.GetEndPosition(false); // build the name field from the BAM alignment. a.name = bam.Name; if (bam.IsFirstMate()) a.name += "/1"; if (bam.IsSecondMate()) a.name += "/2"; a.score = ToString(bam.MapQuality); a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-"; if (_bamOutput == true) { bool overlapsFound = false; // treat the BAM alignment as a single "block" if (_obeySplits == false) { overlapsFound = FindOneOrMoreOverlap(a); } // split the BAM alignment into discrete blocks and // look for overlaps only within each block. else { bool overlapFoundForBlock; bedVector bedBlocks; // vec to store the discrete BED "blocks" from a // we don't want to split on "D" ops, hence the "false" getBamBlocks(bam, refs, bedBlocks, false); vector<BED>::const_iterator bedItr = bedBlocks.begin(); vector<BED>::const_iterator bedEnd = bedBlocks.end(); for (; bedItr != bedEnd; ++bedItr) { overlapFoundForBlock = FindOneOrMoreOverlap(a); if (overlapFoundForBlock == true) overlapsFound = true; } } if (overlapsFound == true) { if (_noHit == false) writer.SaveAlignment(bam); } else { if (_noHit == true) { writer.SaveAlignment(bam); } } } else { // treat the BAM alignment as a single BED "block" if (_obeySplits == false) { FindOverlaps(a, hits); hits.clear(); } // split the BAM alignment into discrete BED blocks and // look for overlaps only within each block. else { bedVector bedBlocks; // vec to store the discrete BED "blocks" from a getBamBlocks(bam, refs, bedBlocks, false); vector<BED>::const_iterator bedItr = bedBlocks.begin(); vector<BED>::const_iterator bedEnd = bedBlocks.end(); for (; bedItr != bedEnd; ++bedItr) { FindOverlaps(*bedItr, hits); hits.clear(); } } } } } // close the relevant BAM files. reader.Close(); if (_bamOutput == true) { writer.Close(); } }
/* Input file format: BAM file */ void Distinguish( string mappingFile, map< string, bool > & hapSNP, map< string, string > & refSeq, string outfilePrefix ) { // [WARNING] If refSeq is not empty than it's BS data, otherwirse is resequencing data. string outfile1 = outfilePrefix + ".1.bam"; string outfile2 = outfilePrefix + ".2.bam"; string outHomoF = outfilePrefix + ".h**o.bam"; string outUdeci = outfilePrefix + ".ambiguity.bam"; // The ambiguity reads or the reads which has't any SNP. BamReader h_I; // bam input file handle if ( !h_I.Open( mappingFile ) ) cerr << "[ERROR]: " << h_I.GetErrorString() << endl; // "header" and "references" from BAM files, these are required by BamWriter const SamHeader header = h_I.GetHeader(); const RefVector references = h_I.GetReferenceData(); BamWriter h_O1, h_O2, h_U, h_H; if ( !h_O1.Open( outfile1, header, references ) ) { cerr << "Cannot open output BAM file: " << outfile1 << endl; exit(1); } if ( !h_O2.Open( outfile2, header, references ) ) { cerr << "Cannot open output BAM file: " << outfile2 << endl; exit(1); } if ( !h_U.Open ( outUdeci, header, references ) ) { cerr << "Cannot open output BAM file: " << outUdeci << endl; exit(1); } if ( !h_H.Open ( outHomoF, header, references ) ) { cerr << "Cannot open output BAM file: " << outHomoF << endl; exit(1); } int readsNumberRecord(0); SamLine samline; // Samline class SamExt sam; BamAlignment al; map< string, pair<BamAlignment, SamExt> > firstMateAl; // record the first mate reads alignment, HIstory problem to be like this struct!! string refstr; // Just For BS data. bool isC2T ( false ); // Just For BS data. while ( h_I.GetNextAlignment( al ) ) { ++readsNumberRecord; if ( readsNumberRecord % 1000000 == 0 ) cerr << "Have been dealed " << readsNumberRecord << " lines. " << local_time (); if ( !al.IsMapped() ) continue; //if ( al.InsertSize == 0 || al.RefID != al.MateRefID ) continue; samline._RID = al.Name; samline._Flag= al.AlignmentFlag; samline._ref_id = h_I.GetReferenceData()[al.RefID].RefName; samline._position = al.Position + 1; // Position (0-base starts in BamTools), but I need 1-base starts samline._mapQ = al.MapQuality; // MateRefID == -1 means mate read is unmapping samline._XorD = ( al.MateRefID > -1 ) ? h_I.GetReferenceData()[al.MateRefID].RefName : "*"; samline._coor = al.MatePosition + 1; // Position (0-base starts in BamTools), but I need 1-base starts samline._seq = al.QueryBases; samline._insert_size = abs (al.InsertSize); if ( samline._ref_id.compare( "BIG_ID_CAT" ) == 0 ) continue; // Ignore "BIG_ID_CAT" // get cigar; samline._cigar = itoa(al.CigarData[0].Length); samline._cigar.append( 1, al.CigarData[0].Type ); for ( size_t i(1); i < al.CigarData.size(); ++i ) { samline._cigar += itoa(al.CigarData[i].Length); samline._cigar.append( 1, al.CigarData[i].Type ); } sam.assign( &samline ); /*********************************** For BS Data *********************************************/ if ( !refSeq.empty() ) { // If the data is BS data, we should modify the QueryBases. if ( !refSeq.count( samline._ref_id ) ) { cerr << "[ERROR]There's no such reference in the reference file. " << samline._ref_id << endl; exit(1); } if ( al.IsFirstMate() && !al.IsReverseStrand() ) { isC2T = true; } else if ( al.IsFirstMate() && al.IsReverseStrand() ) { isC2T = false; } else if ( al.IsSecondMate() && !al.IsReverseStrand() ) { isC2T = false; } else if ( al.IsSecondMate() && al.IsReverseStrand() ) { isC2T = true; } else { cerr << "[ERROR MATCH] " << endl; exit(1); } refstr.assign( refSeq[samline._ref_id], sam.ref_start() - 1, sam.ref_end() - sam.ref_start() + 1 ); modifyBSreadBases( samline._ref_id, sam.ref_start (), sam.read_start(), sam.cigar_seq(), refstr, sam._seq, hapSNP, isC2T ); } /********************************** End For BS Data *******************************************/ // Consider the mate pair reads if ( !firstMateAl.count(al.Name) && (al.MateRefID > -1) ) { firstMateAl[al.Name] = std::make_pair( al, sam ); } else { // Consider the mate pair reads if ( !firstMateAl.count(al.Name) ) { switch ( Decide( sam, hapSNP ) ) { case 1 : h_O1.SaveAlignment( al ); break; // Hap1 case 2 : h_O2.SaveAlignment( al ); break; // Hap2 case 0 : h_U.SaveAlignment ( al ); break; // Ambiguity default: // This alignment didn't contain any hete SNP. h_H.SaveAlignment ( al ); // Homozygous reads } } else { int mark1 = Decide( firstMateAl[al.Name].second, hapSNP ); int mark2 = Decide( sam, hapSNP ); if ( mark1 == 1 && mark2 == 1 ) { h_O1.SaveAlignment( firstMateAl[al.Name].first ); h_O1.SaveAlignment( al ); } else if ( (mark1 == 1 && mark2 == 0) || (mark1 == 0 && mark2 == 1) ) { h_O1.SaveAlignment( firstMateAl[al.Name].first ); h_O1.SaveAlignment( al ); } else if ( (mark1 == 1 && mark2 == -1) || (mark1 == -1 && mark2 == 1) ) { h_O1.SaveAlignment( firstMateAl[al.Name].first ); h_O1.SaveAlignment( al ); } else if ( mark1 == 2 && mark2 == 2 ) { h_O2.SaveAlignment( firstMateAl[al.Name].first ); h_O2.SaveAlignment( al ); } else if ( (mark1 == 2 && mark2 == 0 ) || (mark1 == 0 && mark2 == 2) ) { h_O2.SaveAlignment( firstMateAl[al.Name].first ); h_O2.SaveAlignment( al ); } else if ( (mark1 == 2 && mark2 == -1) || (mark1 == -1 && mark2 == 2) ) { h_O2.SaveAlignment( firstMateAl[al.Name].first ); h_O2.SaveAlignment( al ); } else if ( mark1 == -1 && mark2 == -1 ) { h_H.SaveAlignment ( firstMateAl[al.Name].first ); h_H.SaveAlignment ( al ); } else { h_U.SaveAlignment ( firstMateAl[al.Name].first ); h_U.SaveAlignment ( al ); } firstMateAl.erase( al.Name ); } } } cerr << "------------ Remaind size: " << firstMateAl.size() << endl; for (map< string, pair<BamAlignment, SamExt> >::iterator it( firstMateAl.begin() ); it != firstMateAl.end(); ++it ) { switch ( Decide( it->second.second, hapSNP ) ) { case 1 : h_O1.SaveAlignment( it->second.first ); break; // Hap1 case 2 : h_O2.SaveAlignment( it->second.first ); break; // Hap2 case 0 : h_U.SaveAlignment ( it->second.first ); break; // Ambiguity default: // This alignment didn't contain any hete SNP. h_H.SaveAlignment ( it->second.first ); // Homozygous reads } } h_I.Close(); h_U.Close(); h_H.Close(); h_O1.Close(); h_O2.Close(); cerr << ">>>>>>>>>>>>> All Done <<<<<<<<<<<<<<" << endl; cerr << "Write to output file: " << outfile1 << endl; cerr << "Write to output file: " << outfile2 << endl; cerr << "Write to output file: " << outHomoF << endl; cerr << "Write to output file: " << outUdeci << endl; return; }
int main (int argc, char *argv[]) { // bool mapped =false; // bool unmapped=false; int bpToDecrease5=1; int bpToDecrease3=2; const string usage=string(string(argv[0])+" [options] input.bam out.bam"+"\n\n"+ "\tThis program takes a BAM file as input and produces\n"+ "\tanother where the putative deaminated bases have\n"+ "\ta base quality score of "+intStringify(baseQualForDeam)+"\n"+ "\tgiven an "+intStringify(offset)+" offset \n"+ "\n"+ "\tOptions:\n"+ "\t\t"+"-n5" +"\t\t\t"+"Decrease the nth bases surrounding the 5' ends (Default:"+stringify(bpToDecrease5)+") "+"\n"+ "\t\t"+"-n3" +"\t\t\t"+"Decrease the nth bases surrounding the 3' ends (Default:"+stringify(bpToDecrease3)+") "+"\n" ); // "\t"+"-u , --unmapped" +"\n\t\t"+"For an unmapped bam file"+"\n"+ // "\t"+"-m , --mapped" +"\n\t\t"+"For an mapped bam file"+"\n"); if( (argc== 1) || (argc== 2 && string(argv[1]) == "-h") || (argc== 2 && string(argv[1]) == "-help") || (argc== 2 && string(argv[1]) == "--help") ){ cout<<"Usage:"<<endl; cout<<usage<<endl; cout<<""<<endl; return 1; } for(int i=1;i<(argc-2);i++){ //all but the last arg if( string(argv[i]) == "-n5" ){ bpToDecrease5 = destringify<int>(argv[i+1]); i++; continue; } if( string(argv[i]) == "-n3" ){ bpToDecrease3 = destringify<int>(argv[i+1]); i++; continue; } cerr<<"Unknown option "<<argv[i] <<" exiting"<<endl; return 1; } if(argc < 3){ cerr<<"Error: Must specify the input and output BAM files"; return 1; } string inbamFile =argv[argc-2]; string outbamFile=argv[argc-1]; if(inbamFile == outbamFile){ cerr<<"Input and output files are the same"<<endl; return 1; } // if(!mapped && !unmapped){ // cerr << "Please specify whether you reads are mapped or unmapped" << endl; // return 1; // } // if(mapped && unmapped){ // cerr << "Please specify either mapped or unmapped but not both" << endl; // return 1; // } BamReader reader; if ( !reader.Open(inbamFile) ) { cerr << "Could not open input BAM files." << endl; return 1; } vector<RefData> testRefData=reader.GetReferenceData(); SamHeader header = reader.GetHeader(); string pID = "decrQualDeaminated"; string pName = "decrQualDeaminated"; string pCommandLine = ""; for(int i=0;i<(argc);i++){ pCommandLine += (string(argv[i])+" "); } putProgramInHeader(&header,pID,pName,pCommandLine); const RefVector references = reader.GetReferenceData(); BamWriter writer; if ( !writer.Open(outbamFile, header, references) ) { cerr << "Could not open output BAM file" << endl; return 1; } BamAlignment al; // BamAlignment al2; // bool al2Null=true; while ( reader.GetNextAlignment(al) ) { if(al.IsPaired() ){ if(al.IsFirstMate() ){ //5' end, need to check first base only if(al.IsReverseStrand()){ // if(!al.IsMapped()){ cerr << "Cannot have reverse complemented unmapped reads :" <<al.Name<< endl; //return 1; } int indexToCheck; //5' of first mate reversed indexToCheck=al.QueryBases.length()-1; for(int i=0;i<bpToDecrease5;i++){ if(toupper(al.QueryBases[indexToCheck]) == 'A'){ al.Qualities[indexToCheck]=char(offset+baseQualForDeam); } indexToCheck=max(indexToCheck-1,0); } }else{ int indexToCheck; //5' of first mate indexToCheck=0; for(int i=0;i<bpToDecrease5;i++){ //first base if(toupper(al.QueryBases[indexToCheck]) == 'T'){ al.Qualities[indexToCheck]=char(offset+baseQualForDeam); } indexToCheck=min(indexToCheck+1,int(al.Qualities.size())); } } }else{ //3' end, need to check last two bases only if( al.IsSecondMate() ){ if(al.IsReverseStrand()){ // if(!al.IsMapped()){ cerr << "Cannot have reverse complemented unmapped reads :" <<al.Name<< endl; //return 1; } int indexToCheck; //3' of second mate reversed indexToCheck=al.QueryBases.length()-1; for(int i=0;i<bpToDecrease3;i++){ if(toupper(al.QueryBases[indexToCheck]) == 'T'){ al.Qualities[indexToCheck]=char(offset+baseQualForDeam); } indexToCheck=max(indexToCheck-1,0); } }else{ int indexToCheck; //3' of second mate forward indexToCheck=0; for(int i=0;i<bpToDecrease3;i++){ //first base if(toupper(al.QueryBases[indexToCheck]) == 'A'){ al.Qualities[indexToCheck]=char(offset+baseQualForDeam); } indexToCheck=min(indexToCheck+1,int(al.Qualities.size())); } } }else{ cerr << "Wrong state" << endl; return 1; } } }//end of paired end else{//we consider single reads to have been sequenced from 5' to 3' if(al.IsReverseStrand()){ //need to consider if(!al.IsMapped()){ cerr << "Cannot have reverse complemented unmapped reads :" <<al.Name<< endl; //return 1; } int indexToCheck; //5' of single read reversed indexToCheck=al.QueryBases.length()-1; for(int i=0;i<bpToDecrease5;i++){ if(toupper(al.QueryBases[indexToCheck]) == 'A'){ al.Qualities[indexToCheck]=char(offset+baseQualForDeam); } indexToCheck=max(indexToCheck-1,0); } //3' of single read reversed indexToCheck=0; for(int i=0;i<bpToDecrease3;i++){ //first base if(toupper(al.QueryBases[indexToCheck]) == 'A'){ al.Qualities[indexToCheck]=char(offset+baseQualForDeam); } indexToCheck=min(indexToCheck+1,int(al.Qualities.size())); } }else{ int indexToCheck; //5' of single read indexToCheck=0; for(int i=0;i<bpToDecrease5;i++){ //first base if(toupper(al.QueryBases[indexToCheck]) == 'T'){ al.Qualities[indexToCheck]=char(offset+baseQualForDeam); } indexToCheck=min(indexToCheck+1,int(al.Qualities.size())); } //3' of single read indexToCheck=al.QueryBases.length()-1; for(int i=0;i<bpToDecrease3;i++){ if(toupper(al.QueryBases[indexToCheck]) == 'T'){ al.Qualities[indexToCheck]=char(offset+baseQualForDeam); } indexToCheck=max(indexToCheck-1,0); } } }//end of single end writer.SaveAlignment(al); }// while ( reader.GetNextAlignment(al) ) { reader.Close(); writer.Close(); cerr<<"Program terminated gracefully"<<endl; return 0; }