int main (int argc, char * argv[]) { vector<string> inputFilenames; string combinedOutFilename, alignmentsOutFilename; try { TCLAP::CmdLine cmd("Program description", ' ', VERSION); TCLAP::ValueArg<string> combinedOutputArg("o", "out", "Combined output filename (BAM format)", true, "", "combined.bam", cmd); TCLAP::ValueArg<int> minInsertArg("n", "min-insert", "Minimum insert size", false, DEFAULT_MIN_GAP, "min insert size", cmd); TCLAP::ValueArg<int> maxInsertArg("x", "max-insert", "Maximum insert size", false, DEFAULT_MAX_GAP, "max insert size", cmd); TCLAP::MultiArg<string> inputArgs("b", "bam", "Input BAM file", true, "input.bam", cmd); cmd.parse(argc, argv); combinedOutFilename = combinedOutputArg.getValue(); MIN_GAP = minInsertArg.getValue(); MAX_GAP = maxInsertArg.getValue(); inputFilenames = inputArgs.getValue(); } catch (TCLAP::ArgException &e) { cerr << "Error: " << e.error() << " " << e.argId() << endl; } // TODO require that alignments are sorted by name BamMultiReader reader; reader.Open(inputFilenames); if (!ValidOut.Open(combinedOutFilename, reader.GetHeader(), reader.GetReferenceData())) { cerr << ValidOut.GetErrorString() << endl; return 1; } string current, prev; char mateID; Group group; set<string> references; Alignment a; while (reader.GetNextAlignment(a)) { parseID(a.Name, current, mateID); if (current.compare(prev) && prev.size() > 0) { processGroup(group, references); group.clear(); references.clear(); } references.insert(a.RefName); GroupKey key; key.refID = a.RefName; key.mateID = mateID; key.rev = a.IsReverseStrand(); group.insert( std::make_pair( key, a ) ); prev = current; } processGroup(group, references); }
bool ConvertTool::ConvertToolPrivate::Run(void) { // ------------------------------------ // initialize conversion input/output // set to default input if none provided if ( !m_settings->HasInput ) m_settings->InputFiles.push_back(Options::StandardIn()); // open input files BamMultiReader reader; if ( !m_settings->HasInput ) { // don't attempt to open index for stdin if ( !reader.Open(m_settings->InputFiles, false) ) { cerr << "Could not open input files" << endl; return false; } } else { if ( !reader.Open(m_settings->InputFiles, true) ) { if ( !reader.Open(m_settings->InputFiles, false) ) { cerr << "Could not open input files" << endl; return false; } else { cerr << "Opened reader without index file, jumping is disabled." << endl; } } } m_references = reader.GetReferenceData(); // set region if specified BamRegion region; if ( m_settings->HasRegion ) { if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { if ( !reader.SetRegion(region) ) { cerr << "Could not set BamReader region to REGION: " << m_settings->Region << endl; return false; } } else { cerr << "Could not parse REGION: " << m_settings->Region << endl; return false; } } // if output file given ofstream outFile; if ( m_settings->HasOutput ) { // open output file stream outFile.open(m_settings->OutputFilename.c_str()); if ( !outFile ) { cerr << "Could not open " << m_settings->OutputFilename << " for output" << endl; return false; } // set m_out to file's streambuf m_out.rdbuf(outFile.rdbuf()); } // ------------------------------------- // do conversion based on format bool convertedOk = true; // pileup is special case // conversion not done per alignment, like the other formats if ( m_settings->Format == FORMAT_PILEUP ) convertedOk = RunPileupConversion(&reader); // all other formats else { bool formatError = false; // set function pointer to proper conversion method void (BamTools::ConvertTool::ConvertToolPrivate::*pFunction)(const BamAlignment&) = 0; if ( m_settings->Format == FORMAT_BED ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBed; else if ( m_settings->Format == FORMAT_BEDGRAPH ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBedGraph; else if ( m_settings->Format == FORMAT_FASTA ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFasta; else if ( m_settings->Format == FORMAT_FASTQ ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFastq; else if ( m_settings->Format == FORMAT_JSON ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintJson; else if ( m_settings->Format == FORMAT_SAM ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintSam; else if ( m_settings->Format == FORMAT_WIGGLE ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintWiggle; else if ( m_settings->Format == FORMAT_YAML ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintYaml; else { cerr << "Unrecognized format: " << m_settings->Format << endl; cerr << "Please see help|README (?) for details on supported formats " << endl; formatError = true; convertedOk = false; } // if format selected ok if ( !formatError ) { // if SAM format & not omitting header, print SAM header first if ( (m_settings->Format == FORMAT_SAM) && !m_settings->IsOmittingSamHeader ) m_out << reader.GetHeaderText(); // iterate through file, doing conversion BamAlignment a; while ( reader.GetNextAlignment(a) ) (this->*pFunction)(a); // set flag for successful conversion convertedOk = true; } } // ------------------------ // clean up & exit reader.Close(); if ( m_settings->HasOutput ) outFile.close(); return convertedOk; }
int main ( int argc, char *argv[] ) { struct parameters *param = 0; param = interface(param, argc, argv); //bam input and generate index if not yet //-------------------------------------------------------------------------------------------------------+ // BAM input (file or filenames?) | //-------------------------------------------------------------------------------------------------------+ char *fof = param->mapping_f; FILE *IN=NULL; char linefof[5000]; int filecount=0; vector <string> fnames; if (strchr(fof,' ')!=NULL) { char *ptr; ptr=strtok(fof," "); while (ptr!=NULL) { fnames.push_back(ptr); filecount++; ptr=strtok(NULL," "); } } else { IN=fopen(fof,"rt"); if (IN!=NULL) { long linecount=0; while (fgets(linefof,5000-1,IN)!=NULL) { linecount++; if (linefof[0]!='#' && linefof[0]!='\n') { char *ptr=strchr(linefof,'\n'); if (ptr!=NULL && ptr[0]=='\n') { ptr[0]='\0'; } FILE *dummy=NULL; dummy=fopen(linefof,"rt"); if (dummy!=NULL) { // seems to be a file of filenames... fclose(dummy); fnames.push_back(linefof); filecount++; } else if (filecount==0 || linecount>=1000-1) { // seems to be a single file fnames.push_back(fof); filecount++; break; } } } fclose(IN); } } //file or file name decided and stored in vector "fnames" cerr << "the input mapping files are:" << endl; vector <string>::iterator fit = fnames.begin(); for(; fit != fnames.end(); fit++) { cerr << *fit << endl; } //-------------------------------------------------------------------------------------------------------+ // end of file or filenames | //-------------------------------------------------------------------------------------------------------+ // open the BAM file(s) BamMultiReader reader; reader.Open(fnames); // get header & reference information string header = reader.GetHeaderText(); RefVector refs = reader.GetReferenceData(); // attempt to open BamWriter BamWriter writer; string outputBam = param->writer; if ( outputBam != "" ) { if ( !writer.Open(param->writer, header, refs) ) { cerr << "Could not open output BAM file" << endl; exit(0); } } BamAlignment bam; while (reader.GetNextAlignment(bam)) { //change RG string rg = "RG"; string rgType = "Z"; string rgValue = "1"; bam.EditTag(rg,rgType,rgValue); writer.SaveAlignment(bam); } // read a bam return 0; } //main
int CropBamTool::CropBam() { // open bam files BamMultiReader bamReader; bamReader.Open(bamFiles); // the dictionary of chromosomes RefVector genome = bamReader.GetReferenceData(); // get the scanning window vector<tuple<int,int,int>> windows; int numWindows = GenericRegionTools::toScanWindow(genome, regionStrings, windows); unordered_set<string> readpool; // temporary struct for sequence object typedef struct { string name; int head_soft_clip; int tail_soft_clip; string seq; string qual; }cropbam_seq_t; // temporary struct for unique seqs map<string,list<cropbam_seq_t>> uniqueSeqPool; // lambda expression for output auto Output = [this](cropbam_seq_t &a){ if (this->outFormat=="fasta"){ cout << ">" << a.name << "\t" << "head_soft_clip=" << a.head_soft_clip << "\t" << "tail_soft_clip=" << a.tail_soft_clip << "\t" << endl << a.seq << endl; } if (this->outFormat=="fastq"){ cout << "@" << a.name << "\t" << "head_soft_clip=" << a.head_soft_clip << "\t" << "tail_soft_clip=" << a.tail_soft_clip << "\t" << endl << a.seq << endl; cout << "+" << endl << a.qual << endl; } }; // loop over windows omp_set_dynamic(0); omp_set_num_threads(numThreads); #pragma omp parallel for shared(genome) for (int i=0; i<numWindows; i++) { clock_t tStart = clock(); bamReader.Open(bamFiles); int wId = get<0>(windows[i]); int wLp = get<1>(windows[i]); int wRp = get<2>(windows[i]); if (verbose>=1) Verbose("process the window " + genome[wId].RefName + ":" + to_string(wLp+1) + "-" + to_string(wRp)); // rewind the bam reader bamReader.Rewind(); // set the region bamReader.SetRegion(wId, wLp, wId, wRp); int numReads = 0; // retrieve the alignment BamAlignment aln; while (bamReader.GetNextAlignment(aln)) { // skip the alignment if it doesn't overlap the window if (aln.Position>=wRp || aln.GetEndPosition()<=wLp) continue; // skip the invalid alignment if (!isValidAlignment(aln, readLenThres, mapQualThres, alnFlagMarker)) continue; // skip the alignment harboring too many mismatches if (!GenericBamAlignmentTools::validReadIdentity(aln, 1-alnIdenThres)) continue; stringstream keyss; keyss << GenericBamAlignmentTools::getBamAlignmentName(aln) << "-" << wId << "-" << wLp << "-" << wRp; string key = keyss.str(); auto ptr = readpool.find(key); if (ptr!=readpool.end()) continue; readpool.emplace(key); // get the partial read string readSegment, readQualSegment, genomeSegment; GenericBamAlignmentTools::getLocalAlignment(aln, wLp, wRp-wLp, readSegment, readQualSegment, genomeSegment); // add soft clip int hsc=0; auto ptr0 = aln.CigarData.begin(); if (aln.Position>=wLp && (ptr0->Type=='S' || ptr0->Type=='H')) { stringstream headClipSeq, headClipQual; for (int i=0; i<ptr0->Length; i++) { headClipSeq << aln.QueryBases[i]; headClipQual << aln.Qualities[i]; } if (keepClip) { readSegment=headClipSeq.str()+readSegment; readQualSegment=headClipQual.str()+readQualSegment; } hsc += ptr0->Length; } int tsc=0; auto ptr1 = aln.CigarData.rbegin(); if (aln.GetEndPosition()<wRp && (ptr1->Type=='S' || ptr1->Type=='H')) { string ss="", qs=""; auto str=aln.QueryBases.rbegin(); auto qtr=aln.Qualities.rbegin(); for (int i=0; i<ptr1->Length; i++,str++,qtr++) { ss=(*str)+ss; qs=(*qtr)+qs; } if (keepClip) { readSegment=readSegment+ss; readQualSegment=readQualSegment+qs; } tsc += ptr1->Length; } if (readSegment.length()>=segmentLenThres) { cropbam_seq_t a; a.name = GenericBamAlignmentTools::getBamAlignmentName(aln); a.head_soft_clip = hsc; a.tail_soft_clip = tsc; a.seq = readSegment; a.qual = readQualSegment; if (uniqueSeqPool.count(a.seq)==0) uniqueSeqPool[a.seq] = list<cropbam_seq_t>(1,a); else uniqueSeqPool[a.seq].emplace_back(a); // if (outFormat=="fasta"){ // cout << ">" << GenericBamAlignmentTools::getBamAlignmentName(aln) << "\t" // << "head_soft_clip=" << hsc << "\t" // << "tail_soft_clip=" << tsc << "\t" // << endl // << readSegment << endl; // } // if (outFormat=="fastq"){ // cout << "@" << GenericBamAlignmentTools::getBamAlignmentName(aln) << "\t" // << "head_soft_clip=" << hsc << "\t" // << "tail_soft_clip=" << tsc << "\t" // << endl // << readSegment << endl; // cout << "+" << endl // << readQualSegment << endl; // } numReads++; } } numReads = 0; if (useUnique){ ofstream of; of.open(outFreq); for (auto a : uniqueSeqPool){ if (a.second.size()>=thresFreq){ Output(*a.second.begin()); of << a.second.begin()->name << "\t" << a.second.size() << endl; numReads ++; } } of.close(); }else{ for (auto a : uniqueSeqPool){ for (auto b : a.second){ Output(b); numReads ++; } } } clock_t tEnd = clock(); if (verbose>=1) Verbose("retrieve " + to_string(numReads) + " reads"); if (verbose>=1) Verbose("time elapsed " + to_string((double)(tEnd-tStart)/CLOCKS_PER_SEC) + " seconds"); } return 0; }
bool FilterTool::FilterToolPrivate::Run(void) { // set to default input if none provided if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); // add files in the filelist to the input file list if ( m_settings->HasInputFilelist ) { ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); if ( !filelist.is_open() ) { cerr << "bamtools filter ERROR: could not open input BAM file list... Aborting." << endl; return false; } string line; while ( getline(filelist, line) ) m_settings->InputFiles.push_back(line); } // initialize defined properties & user-specified filters // quit if failed if ( !SetupFilters() ) return false; // open reader without index BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools filter ERROR: could not open input files for reading." << endl; return false; } // retrieve reader header & reference data const string headerText = reader.GetHeaderText(); filterToolReferences = reader.GetReferenceData(); // determine compression mode for BamWriter bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); BamWriter::CompressionMode compressionMode = BamWriter::Compressed; if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed; // open BamWriter BamWriter writer; writer.SetCompressionMode(compressionMode); if ( !writer.Open(m_settings->OutputFilename, headerText, filterToolReferences) ) { cerr << "bamtools filter ERROR: could not open " << m_settings->OutputFilename << " for writing." << endl; reader.Close(); return false; } // if no region specified, filter entire file BamAlignment al; if ( !m_settings->HasRegion ) { while ( reader.GetNextAlignment(al) ) { if ( CheckAlignment(al) ) writer.SaveAlignment(al); } } // otherwise attempt to use region as constraint else { // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { // attempt to find index files reader.LocateIndexes(); // if index data available for all BAM files, we can use SetRegion if ( reader.HasIndexes() ) { // attempt to use SetRegion(), if failed report error if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { cerr << "bamtools filter ERROR: set region failed. Check that REGION describes a valid range" << endl; reader.Close(); return false; } // everything checks out, just iterate through specified region, filtering alignments while ( reader.GetNextAlignment(al) ) if ( CheckAlignment(al) ) writer.SaveAlignment(al); } // no index data available, we have to iterate through until we // find overlapping alignments else { while ( reader.GetNextAlignment(al) ) { if ( (al.RefID >= region.LeftRefID) && ((al.Position + al.Length) >= region.LeftPosition) && (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { if ( CheckAlignment(al) ) writer.SaveAlignment(al); } } } } // error parsing REGION string else { cerr << "bamtools filter ERROR: could not parse REGION: " << m_settings->Region << endl; cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" << endl; reader.Close(); return false; } } // clean up & exit reader.Close(); writer.Close(); return true; }
bool ConvertTool::ConvertToolPrivate::Run(void) { // ------------------------------------ // initialize conversion input/output // set to default input if none provided if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); // add files in the filelist to the input file list if ( m_settings->HasInputFilelist ) { ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); if ( !filelist.is_open() ) { cerr << "bamtools convert ERROR: could not open input BAM file list... Aborting." << endl; return false; } string line; while ( getline(filelist, line) ) m_settings->InputFiles.push_back(line); } // open input files BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools convert ERROR: could not open input BAM file(s)... Aborting." << endl; return false; } // if input is not stdin & a region is provided, look for index files if ( m_settings->HasInput && m_settings->HasRegion ) { if ( !reader.LocateIndexes() ) { cerr << "bamtools convert ERROR: could not locate index file(s)... Aborting." << endl; return false; } } // retrieve reference data m_references = reader.GetReferenceData(); // set region if specified BamRegion region; if ( m_settings->HasRegion ) { if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { if ( reader.HasIndexes() ) { if ( !reader.SetRegion(region) ) { cerr << "bamtools convert ERROR: set region failed. Check that REGION describes a valid range" << endl; reader.Close(); return false; } } } else { cerr << "bamtools convert ERROR: could not parse REGION: " << m_settings->Region << endl; cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" << endl; reader.Close(); return false; } } // if output file given ofstream outFile; if ( m_settings->HasOutput ) { // open output file stream outFile.open(m_settings->OutputFilename.c_str()); if ( !outFile ) { cerr << "bamtools convert ERROR: could not open " << m_settings->OutputFilename << " for output" << endl; return false; } // set m_out to file's streambuf m_out.rdbuf(outFile.rdbuf()); } // ------------------------------------- // do conversion based on format bool convertedOk = true; // pileup is special case // conversion not done per alignment, like the other formats if ( m_settings->Format == FORMAT_PILEUP ) convertedOk = RunPileupConversion(&reader); // all other formats else { bool formatError = false; // set function pointer to proper conversion method void (BamTools::ConvertTool::ConvertToolPrivate::*pFunction)(const BamAlignment&) = 0; if ( m_settings->Format == FORMAT_BED ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintBed; else if ( m_settings->Format == FORMAT_FASTA ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFasta; else if ( m_settings->Format == FORMAT_FASTQ ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintFastq; else if ( m_settings->Format == FORMAT_JSON ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintJson; else if ( m_settings->Format == FORMAT_SAM ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintSam; else if ( m_settings->Format == FORMAT_YAML ) pFunction = &BamTools::ConvertTool::ConvertToolPrivate::PrintYaml; else { cerr << "bamtools convert ERROR: unrecognized format: " << m_settings->Format << endl; cerr << "Please see documentation for list of supported formats " << endl; formatError = true; convertedOk = false; } // if format selected ok if ( !formatError ) { // if SAM format & not omitting header, print SAM header first if ( (m_settings->Format == FORMAT_SAM) && !m_settings->IsOmittingSamHeader ) m_out << reader.GetHeaderText(); // iterate through file, doing conversion BamAlignment a; while ( reader.GetNextAlignment(a) ) (this->*pFunction)(a); // set flag for successful conversion convertedOk = true; } } // ------------------------ // clean up & exit reader.Close(); if ( m_settings->HasOutput ) outFile.close(); return convertedOk; }
int main ( int argc, char *argv[] ) { struct parameters *param = 0; param = interface(param, argc, argv); //region file input (the region file should be sorted as the same way as the bam file) ifstream region_f; region_f.open(param->region_f, ios_base::in); // the region file is opened //bam input and generate index if not yet //-------------------------------------------------------------------------------------------------------+ // BAM input (file or filenames?) | //-------------------------------------------------------------------------------------------------------+ char *fof = param->mapping_f; FILE *IN=NULL; char linefof[5000]; int filecount=0; vector <string> fnames; if (strchr(fof,' ')!=NULL) { char *ptr; ptr=strtok(fof," "); while (ptr!=NULL) { fnames.push_back(ptr); filecount++; ptr=strtok(NULL," "); } } else { IN=fopen(fof,"rt"); if (IN!=NULL) { long linecount=0; while (fgets(linefof,5000-1,IN)!=NULL) { linecount++; if (linefof[0]!='#' && linefof[0]!='\n') { char *ptr=strchr(linefof,'\n'); if (ptr!=NULL && ptr[0]=='\n') { ptr[0]='\0'; } FILE *dummy=NULL; dummy=fopen(linefof,"rt"); if (dummy!=NULL) { // seems to be a file of filenames... fclose(dummy); fnames.push_back(linefof); filecount++; } else if (filecount==0 || linecount>=1000-1) { // seems to be a single file fnames.push_back(fof); filecount++; break; } } } fclose(IN); } } //file or file name decided and stored in vector "fnames" cerr << "the input mapping files are:" << endl; vector <string>::iterator fit = fnames.begin(); for(; fit != fnames.end(); fit++) { cerr << *fit << endl; } //-------------------------------------------------------------------------------------------------------+ // end of file or filenames | //-------------------------------------------------------------------------------------------------------+ // open the BAM file(s) BamMultiReader reader; reader.Open(fnames); // get header & reference information string header = reader.GetHeaderText(); RefVector refs = reader.GetReferenceData(); if ( ! reader.LocateIndexes() ) // opens any existing index files that match our BAM files reader.CreateIndexes(); // creates index files for BAM files that still lack one // locus bias struct lb empty_profile = {0,0,0,0}; vector <struct lb> locus_b(1000, empty_profile); // output locus bias file string locus_bias_set = param->lbias; ofstream locus_bias; if ( locus_bias_set != "" ) { locus_bias.open(param->lbias); if ( !locus_bias ) { cerr << "can not open locus_bias file.\n"; exit(0); } } //should decide which chromosome string line; string old_chr = "SRP"; string type = param->type; //whether do some position-level pile-up stuff bool posc = false; ofstream posc_f; ofstream chrmap_f; string poscset = param->posc; if ( poscset != "" ) { posc = true; posc_f.open(param->posc); chrmap_f.open(param->chrmap); } bool noChr; if ( param->nochr == 1 ){ noChr = true; } else { noChr = false; } //regions for the input of region file deque <struct region> regions; getline(region_f, line); //get the first line eatline(line,regions,noChr); deque <struct region>::iterator it = regions.begin(); while ( it->chr != old_chr ) { old_chr = it->chr; // set the current chr as old chr int chr_id = reader.GetReferenceID(it->chr); if ( chr_id == -1 ) { //reference not found for (; it != regions.end() && it->chr == old_chr; ) { gene_processing(*it,locus_b); // print the old region info it = regions.erase(it); // erase the current region } while ( regions.empty() ) { getline(region_f, line); if ( region_f.eof() ){ cerr << "finished: end of region file, zone 0" << endl; break; } eatline(line, regions,noChr); it = regions.begin(); if (it->chr == old_chr){ gene_processing(*it,locus_b); regions.clear(); continue; } } continue; } int chr_len = refs.at(chr_id).RefLength; if ( !reader.SetRegion(chr_id, 1, chr_id, chr_len) ) // here set region { cerr << "bamtools count ERROR: Jump region failed " << it->chr << endl; reader.Close(); exit(1); } //pile-up pos stats set <string> fragment; map <string, unsigned int> pileup; bool isposPileup = false; unsigned int old_start = 0; unsigned int total_tags = 0; unsigned int total_pos = 0; unsigned int pileup_pos = 0; BamAlignment bam; while (reader.GetNextAlignment(bam)) { if ( bam.IsMapped() == false ) continue; // skip unaligned reads unsigned int unique; bam.GetTag("NH", unique); if (param->unique == 1) { if (unique != 1) { // skipe uniquelly mapped reads continue; } } if (read_length == 0){ read_length = bam.Length; } //cout << bam.Name << endl; string chrom = refs.at(bam.RefID).RefName; string strand = "+"; if (bam.IsReverseStrand()) strand = "-"; unsigned int alignmentStart = bam.Position+1; unsigned int mateStart; if (type == "p") mateStart = bam.MatePosition+1; unsigned int alignmentEnd = bam.GetEndPosition(); unsigned int cigarEnd; vector <int> blockLengths; vector <int> blockStarts; blockStarts.push_back(0); ParseCigar(bam.CigarData, blockStarts, blockLengths, cigarEnd); // position check for unique mapped reads (because is paired-end reads, shoule base on fragment level for paired end reads) if (posc == true && unique == 1) { if (type == "p" && fragment.count(bam.Name) > 0) fragment.erase(bam.Name); else { total_tags++; if (type == "p"){ fragment.insert(bam.Name); } string alignSum; if (type == "p") { alignSum = int2str(alignmentStart) + "\t" + int2str(mateStart) + "\t.\t" + strand; } else { alignSum = int2str(alignmentStart) + "\t" + int2str(alignmentEnd) + "\t.\t" + strand; } if ( alignmentStart != old_start ) { isposPileup = false; map <string, unsigned int>::iterator pit = pileup.begin(); for (; pit != pileup.end(); pit++) { posc_f << chrom << "\truping\tpileup\t" << pit->first << "\t.\t" << "Pileup=" << pit->second << endl; //print pileup } pileup.clear(); //clear pileup set pileup.insert( pair <string, unsigned int> (alignSum, 1) ); //insert the new read total_pos++; } else if ( alignmentStart == old_start ) { // same starts if ( pileup.count(alignSum) > 0 ) { // pileup if ( pileup[alignSum] == 1 && isposPileup == false ) { pileup_pos++; isposPileup = true; } pileup[alignSum]++; } else { pileup.insert( pair <string, unsigned int> (alignSum, 1) ); } } //same starts } //new fragment old_start = alignmentStart; } // do pos check float incre = 1.; if (blockStarts.size() > 1) incre = 0.5; // incre half for junction reads incre /= static_cast < float >(unique); // for multi aligned reads deque <struct region>::iterator iter = regions.begin(); if ( iter->start > alignmentEnd ) continue; // skip reads not overlapping with the first region while ( iter->chr == old_chr && iter->start <= alignmentEnd && iter != regions.end() ) { if (iter->end < alignmentStart) { // the region end is beyond the alignmentStart gene_processing(*iter,locus_b); // processing iter = regions.erase(iter); // this region should be removed if ( regions.empty() ) { getline(region_f, line); // get a line of region file if ( ! region_f.eof() ) { eatline(line, regions, noChr); // eat a line and put it into the duque iter = regions.begin(); } else { // it's reaching the end of the region file cerr << "finished: end of region file, zone 3" << endl; break; } } continue; } if (iter->end >= alignmentStart && iter->start <= alignmentEnd) { //overlapping, should take action vector <int>::iterator cigit = blockStarts.begin(); for (; cigit != blockStarts.end(); cigit++) { unsigned int current_start = *cigit + alignmentStart; int current_pos = current_start - (iter->start); //cout << iter->chr << "\t" << iter->start << "\t" << iter->end << "\t" << current_start << endl; if ( (iter->tags).count(current_pos) > 0 ) { (iter->tags)[current_pos] += incre; } else (iter->tags).insert( pair<int, float>(current_pos, incre) ); } } // overlapping take action! if ( (iter+1) != regions.end() ) iter++; // if this region is not the last element in the deque else { // the last element getline(region_f, line); // get a line of region file if ( ! region_f.eof() ){ eatline(line, regions, noChr); // eat a line and put it into the duque iter = regions.end(); iter--; } else { //it's reaching the end of the region file cerr << "finished: end of region file, zone 4" << endl; break; } } } //while } // read a bam // print chr map if (posc == true) { chrmap_f << old_chr << "\t" << total_tags << "\t" << total_pos << "\t" << pileup_pos << endl; } //somehow to loop back it = regions.begin(); //reset to begin for (; it != regions.end() && it->chr == old_chr; ) { gene_processing(*it,locus_b); // print the old region info it = regions.erase(it); // erase the current region } while ( regions.empty() ) { getline(region_f, line); if ( region_f.eof() ){ cerr << "finished: end of region file, zone 5" << endl; //print locus bias for (unsigned int l = 0; l < 1000; l++){ locus_bias << l << "\t" << locus_b[l].ps << "\t" << locus_b[l].hs << "\t" << locus_b[l].pe << "\t" << locus_b[l].he << endl; } exit(0); } eatline(line, regions, noChr); it = regions.begin(); if (it->chr == old_chr){ gene_processing(*it, locus_b); regions.clear(); continue; } } } // region chr != old chr regions.clear(); reader.Close(); region_f.close(); return 0; } //main
int FileReader::runInternal() { ogeNameThread("am_FileReader"); if(!format_specified) format = deduceFileFormat(); if(format == FORMAT_BAM) { BamMultiReader reader; if(!reader.Open(filenames)) { cerr << "Error opening BAM files." << endl; reader.Close(); return -1; } header = reader.GetHeader(); references = reader.GetReferenceData(); open = true; BamAlignment * al; while(true) { if(load_string_data) al = reader.GetNextAlignment(); else al = reader.GetNextAlignmentCore(); if(!al) break; putOutputAlignment(al); } reader.Close(); } else if(format == FORMAT_SAM) { vector<SamReader> readers; SamHeader first_header; // before doing any reading, open the files to // verify they are the right format, etc. for(int i = 0; i < filenames.size(); i++) { SamReader reader; if(!reader.Open(filenames[i])) { cerr << "Error opening SAM file: " << filenames[i] << endl; return -1; } if(filenames.size() > 1 && i == 0) first_header = header; // TODO: We can probably find a better way to deal with multiple SAM file headers, // but for now we should disallow different headers to avoid issues. if(i > 0 && header.ToString() != first_header.ToString()) cerr << "Warning! SAM input files have different headers." << endl; reader.Close(); } for(int i = 0; i < filenames.size(); i++) { SamReader reader; if(!reader.Open(filenames[i])) { cerr << "Error opening SAM file: " << filenames[i] << endl; return -1; } header = reader.GetHeader(); references = reader.GetReferenceData(); open = true; if(filenames.size() > 1 && i == 0) first_header = header; BamAlignment * al = NULL; while(true) { al = reader.GetNextAlignment(); if(NULL == al) break; putOutputAlignment(al); } reader.Close(); } } else { cerr << "FileReader couldn't detect file format. Aborting." << endl; exit(-1); return -1; } return 0; }