std::string getQuickStats(const std::string &bamFile, std::map< std::string, int > &keyLen, unsigned int &nFlowFZ, unsigned int &nFlowZM) { std::string errMsg = ""; BamTools::BamReader bamReader; if(!bamReader.Open(bamFile)) { errMsg += "Failed to open bam " + bamFile + "\n"; return(errMsg); } BamTools::SamHeader samHeader = bamReader.GetHeader(); for (BamTools::SamReadGroupIterator itr = samHeader.ReadGroups.Begin(); itr != samHeader.ReadGroups.End(); ++itr ) { if(itr->HasID()) keyLen[itr->ID] = itr->HasKeySequence() ? itr->KeySequence.length() : 0; if(itr->HasFlowOrder()) nFlowZM = std::max(nFlowZM,(unsigned int) itr->FlowOrder.length()); } BamTools::BamAlignment alignment; std::vector<uint16_t> flowIntFZ; while(bamReader.GetNextAlignment(alignment)) { if(alignment.GetTag("FZ", flowIntFZ)) nFlowFZ = flowIntFZ.size(); break; } bamReader.Close(); if(nFlowFZ==0) std::cout << "NOTE: bam file has no flow signals in FZ tag: " + bamFile + "\n"; if(nFlowZM==0) std::cout << "NOTE: bam file has no flow signals in ZM tag: " + bamFile + "\n"; return(errMsg); }
void BamHeaderHelper::GetRefID(BamTools::BamReader &bamReader) { BamTools::SamHeader samHeader = bamReader.GetHeader(); for (BamTools::SamSequenceIterator itr = samHeader.Sequences.Begin(); itr != samHeader.Sequences.End(); ++itr) { string bamseq = itr->Name; bam_sequence_names.push_back(bamseq); } }
void BamHeaderHelper::GetFlowOrder(BamTools::BamReader &bamReader){ BamTools::SamHeader samHeader = bamReader.GetHeader(); if (!samHeader.HasReadGroups()) { //bamReader.Close(); cerr << "ERROR: there is no read group in " << "this file" << endl; //exit(1); } for (BamTools::SamReadGroupIterator itr = samHeader.ReadGroups.Begin(); itr != samHeader.ReadGroups.End(); ++itr) { if (itr->HasFlowOrder()) { flow_order_set.push_back(itr->FlowOrder); //flowKey = itr->KeySequence; } } }
std::vector< Sample::SharedPtr > BamAlignmentReader::GetBamReaderSamples(const std::string& bamPath) { std::vector< Sample::SharedPtr > samplePtrs; BamTools::BamReader bamReader; if (!bamReader.Open(bamPath)) { throw "Unable to open bam file"; } auto readGroups = bamReader.GetHeader().ReadGroups; auto iter = readGroups.Begin(); for (; iter != readGroups.End(); ++iter) { auto samplePtr = std::make_shared< Sample >((*iter).Sample, (*iter).ID, bamPath); samplePtrs.emplace_back(samplePtr); } bamReader.Close(); return samplePtrs; }
void Config::InitializationClustering() { struct stat st; if(stat(Workspace.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Workspace directory already present"); else if (mkdir(Workspace.c_str(), 0755) != 0) { Log("[Error] Could not create workspace directory: " + Workspace); exit(1); } RunningTasksFile = Workspace + "/" + FilePrefix + "running.tasks"; StatsFile = Workspace + "/" + FilePrefix + "stats"; BinClusterFile = Workspace + "/" + FilePrefix + "bpc"; clusterFile = new ClusterFile(BinClusterFile); clusterDir = Workspace + "/clusters/"; if(stat(clusterDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Cluster directory already present"); else if (mkdir(clusterDir.c_str(), 0755) != 0) { Log("[Error] Could not create cluster directory: " + clusterDir); exit(1); } insertsizeDir = Workspace + "/insertsize/"; if(stat(insertsizeDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Insertsize directory already present"); else if (mkdir(insertsizeDir.c_str(), 0755) != 0) { Log("[Error] Could not create insertsize directory: " + insertsizeDir); exit(1); } coverageDir = Workspace + "/coverage/"; if(stat(coverageDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Coverage directory already present"); else if (mkdir(coverageDir.c_str(), 0755) != 0) { Log("[Error] Could not create coverage directory: " + coverageDir); exit(1); } if (!ForwardBam.empty() && !ReverseBam.empty() && PairedBam.empty()) { UsePairedBam = false; } else if (ForwardBam.empty() && ReverseBam.empty() && !PairedBam.empty()) { UsePairedBam = true; } else { Log("[Error] No correct bam file(s)"); exit(1); } BamTools::BamAlignment alignment; BamTools::BamReader BamReader; if (UsePairedBam) { BamReader.Open(PairedBam); if (not BamReader.IsOpen()) { Log("[Error] Could not open paired bam"); exit(1); } if (PairedIndex.empty()) { if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) { PairedIndex = PairedBam.substr(0,PairedBam.find_last_of(".bam")-3) + ".bai"; BamReader.OpenIndex(PairedIndex); } if (not BamReader.HasIndex()) { Log("[Error] No index for bamfile"); exit(1); } } BamTools::SamHeader header = BamReader.GetHeader(); for (BamTools::SamReadGroupIterator it = header.ReadGroups.Begin(); it != header.ReadGroups.End(); it++) { BamTools::SamReadGroup* readgroup = &*it; readNameConverter.TrimName(readgroup->ID); readNameConverter.AddReadGroup(readgroup->ID); } long int count = 0; while (BamReader.GetNextAlignment(alignment)) { string RG; if (alignment.GetTag("RG", RG)) { if (not NameTrim.empty()) readNameConverter.TrimName(RG); if (readNameConverter.AddReadGroup(RG)) { Log("[Warning] Readgroup '" + RG + "' found in reads but not in header"); count = 0; } } count++; if (count > 10000) break; } BamReader.Close(); } else { BamReader.Open(ForwardBam); if (not BamReader.IsOpen()) { Log("[Error] Could not open first/forward bam"); exit(1); } if (ForwardIndex.empty()) { if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) { ForwardIndex = ForwardBam.substr(0,ForwardBam.find_last_of(".bam")-3) + ".bai"; BamReader.OpenIndex(ForwardIndex); } if (not BamReader.HasIndex()) { Log("[Error] No index for forward bamfile"); exit(1); } } BamTools::SamHeader forwardheader = BamReader.GetHeader(); for (BamTools::SamReadGroupIterator it = forwardheader.ReadGroups.Begin(); it != forwardheader.ReadGroups.End(); it++) { BamTools::SamReadGroup* readgroup = &*it; readNameConverter.TrimName(readgroup->ID); readNameConverter.AddReadGroup(readgroup->ID); } long int count = 0; while (BamReader.GetNextAlignment(alignment)) { string RG; if (alignment.GetTag("RG", RG)) { if (!NameTrim.empty()) readNameConverter.TrimName(RG); if (readNameConverter.AddReadGroup(RG)) { Log("[Warning] Readgroup '" + RG + "' found in forward reads but not in header"); count = 0; } } count++; if (count > 10000) break; } BamReader.Close(); BamReader.Open(ReverseBam); if (not BamReader.IsOpen()) { Log("[Error] Could not open second/reverse bam"); exit(1); } if (ReverseIndex.empty()) { if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) { ReverseIndex = ReverseBam.substr(0,ReverseBam.find_last_of(".bam")-3) + ".bai"; BamReader.OpenIndex(ReverseIndex); } if (not BamReader.HasIndex()) { Log("[Error] No index for reverse bamfile"); exit(1); } } BamTools::SamHeader reverseheader = BamReader.GetHeader(); for (BamTools::SamReadGroupIterator it = reverseheader.ReadGroups.Begin(); it != reverseheader.ReadGroups.End(); it++) { BamTools::SamReadGroup* readgroup = &*it; readNameConverter.TrimName(readgroup->ID); if (readNameConverter.AddReadGroup(readgroup->ID)) { Log("[Warning] Readgroup '" + readgroup->ID + "' found in reverse but not in forward"); } } count = 0; while (BamReader.GetNextAlignment(alignment)) { string RG; if (alignment.GetTag("RG", RG)) { if (!NameTrim.empty()) readNameConverter.TrimName(RG); if (readNameConverter.AddReadGroup(RG)) { Log("[Warning] Readgroup '" + RG + "' found in reverse reads but not in header"); count = 0; } } count++; if (count > 10000) break; } BamReader.Close(); } for(map<string, int>::iterator it = readNameConverter.ReadGroups.begin(); it!=readNameConverter.ReadGroups.end(); ++it) { ostringstream logBuffer; logBuffer << "Readgroup found: " << it->second << " - " << it->first; Log(logBuffer.str()); } writeConfigFile(Workspace + FilePrefix + "config"); }
void MyBamGroup::ReadGroup(char *bamFile){ BamTools::BamReader bamReader; if(!bamReader.Open(std::string(bamFile))) { errMsg = "Failed to open bam " + std::string(bamFile) + "\n"; } else { BamTools::SamHeader samHeader = bamReader.GetHeader(); for (BamTools::SamReadGroupIterator itr = samHeader.ReadGroups.Begin(); itr != samHeader.ReadGroups.End(); ++itr ) { if(itr->HasID()) { ID.push_back(itr->ID); } else { ID.push_back(""); } if(itr->HasFlowOrder()) { FlowOrder.push_back(itr->FlowOrder); } else { FlowOrder.push_back(""); } if(itr->HasKeySequence()) { KeySequence.push_back(itr->KeySequence); } else { KeySequence.push_back(""); } if(itr->HasDescription()) { Description.push_back(itr->Description); } else { Description.push_back(""); } if(itr->HasLibrary()) { Library.push_back(itr->Library); } else { Library.push_back(""); } if(itr->HasPlatformUnit()) { PlatformUnit.push_back(itr->PlatformUnit); } else { PlatformUnit.push_back(""); } if(itr->HasPredictedInsertSize()) { PredictedInsertSize.push_back(itr->PredictedInsertSize); } else { PredictedInsertSize.push_back(""); } if(itr->HasProductionDate()) { ProductionDate.push_back(itr->ProductionDate); } else { ProductionDate.push_back(""); } if(itr->HasProgram()) { Program.push_back(itr->Program); } else { Program.push_back(""); } if(itr->HasSample()) { Sample.push_back(itr->Sample); } else { Sample.push_back(""); } if(itr->HasSequencingCenter()) { SequencingCenter.push_back(itr->SequencingCenter); } else { SequencingCenter.push_back(""); } if(itr->HasSequencingTechnology()) { SequencingTechnology.push_back(itr->SequencingTechnology); } else { SequencingTechnology.push_back(""); } } bamReader.Close(); } }