bool GetBamTags(BamTools::BamAlignment &alignment, const int &num_flows, vector<float> &measurements, vector<float> &phase_params, int &start_flow) { vector<int16_t> quantized_measurements; // Retrieve normalized measurements from BAM file if (not alignment.GetTag("ZM", quantized_measurements)) { cerr << "ERROR: Normalized measurements ZM:tag is not present in read " << alignment.Name << endl; return false; } if ((int)quantized_measurements.size() > num_flows) { cerr << "ERROR: Normalized measurements ZM:tag length exceeds flow order length in read " << alignment.Name << endl; return false; } measurements.assign(quantized_measurements.size(), 0.0); for (size_t counter = 0; counter < quantized_measurements.size(); ++counter) measurements.at(counter) = (float)quantized_measurements.at(counter)/256; // Retrieve phasing parameters from BAM file if (not alignment.GetTag("ZP", phase_params)) { cerr << "ERROR: Phasing Parameters ZP:tag is not present in read " << alignment.Name << endl; return false; } if (phase_params.size() != 3) { cerr << "ERROR: Phasing Parameters ZP:tag does not have 3 phase parameters in read " << alignment.Name << endl; return false; } if (phase_params[0] < 0 or phase_params[0] > 1 or phase_params[1] < 0 or phase_params[1] > 1 or phase_params[2] < 0 or phase_params[2] > 1) { cerr << "ERROR: Phasing Parameters ZP:tag outside of [0,1] range in read " << alignment.Name << endl; return false; } phase_params[2] = 0.0f; // ad-hoc corrector: zero droop // Retrieve start flow if (not alignment.GetTag("ZF", start_flow)) { cerr << "ERROR: Start Flow ZF:tag not found in read " << alignment.Name << endl; return false; } if (start_flow < 0 or start_flow >= num_flows) { cerr << "ERROR: Start flow outsize of [0,num_flows) range in read " << alignment.Name << endl; cerr << "Start flow: " << start_flow << " Number of flows: " << num_flows; return false; } // A start flow of zero indicated a read that did not pass basecaller filters if (start_flow == 0) { cerr << "WARNING: Start Flow ZF:tag has zero value in read " << alignment.Name << endl; return false; } return true; }
bool ReadContainer::GetFloatBamTag(const BamTools::BamAlignment& aln, const std::string& tag_name, float* destination) { if (!aln.GetTag(tag_name, *destination)) { return false; } return true; }
std::string getQuickStats(const std::string &bamFile, std::map< std::string, int > &keyLen, unsigned int &nFlowFZ, unsigned int &nFlowZM) { std::string errMsg = ""; BamTools::BamReader bamReader; if(!bamReader.Open(bamFile)) { errMsg += "Failed to open bam " + bamFile + "\n"; return(errMsg); } BamTools::SamHeader samHeader = bamReader.GetHeader(); for (BamTools::SamReadGroupIterator itr = samHeader.ReadGroups.Begin(); itr != samHeader.ReadGroups.End(); ++itr ) { if(itr->HasID()) keyLen[itr->ID] = itr->HasKeySequence() ? itr->KeySequence.length() : 0; if(itr->HasFlowOrder()) nFlowZM = std::max(nFlowZM,(unsigned int) itr->FlowOrder.length()); } BamTools::BamAlignment alignment; std::vector<uint16_t> flowIntFZ; while(bamReader.GetNextAlignment(alignment)) { if(alignment.GetTag("FZ", flowIntFZ)) nFlowFZ = flowIntFZ.size(); break; } bamReader.Close(); if(nFlowFZ==0) std::cout << "NOTE: bam file has no flow signals in FZ tag: " + bamFile + "\n"; if(nFlowZM==0) std::cout << "NOTE: bam file has no flow signals in ZM tag: " + bamFile + "\n"; return(errMsg); }
// Calculate the error rate between the read and the reference double getErrorRate(BamTools::BamAlignment& record) { int nm = 0; bool hasNM = record.GetTag("NM", nm); if(hasNM) return (double)nm / record.Length; else return 0.0f; }
bool MolecularTagTrimmer::GetTagsFromBamAlignment(const BamTools::BamAlignment& alignment, MolTag& Tags) { // Don't bother if there is nothing to look at if (num_read_groups_with_tags_ == 0){ Tags.Clear(); return true; } // Load Tags from Bam Alignment if (not alignment.GetTag("ZT", Tags.prefix_mol_tag)) Tags.prefix_mol_tag.clear(); if (not alignment.GetTag("YT", Tags.suffix_mol_tag)) Tags.suffix_mol_tag.clear(); // Check if this read should have tags associated with it string read_group_name; if (not alignment.GetTag("RG",read_group_name)) return false; std::map<string,int>::const_iterator idx_it = read_group_name_to_index_.find(read_group_name); if (idx_it == read_group_name_to_index_.end()) return false; if (NeedPrefixTag(idx_it->second)) { if (Tags.prefix_mol_tag.empty()) return false; } else Tags.prefix_mol_tag.clear(); if (NeedSuffixTag(idx_it->second)) { if (Tags.suffix_mol_tag.empty()) return false; } else Tags.suffix_mol_tag.clear(); // We don't allow the joint analysis of tagged and untagged samples at the same time if (not Tags.HasTags()) return false; return true; }
int SNPBamProcessor::get_haplotype(BamTools::BamAlignment& aln){ if (!aln.HasTag(HAPLOTYPE_TAG)) return -1; uint8_t haplotype; if (!aln.GetTag(HAPLOTYPE_TAG, haplotype)){ char type; aln.GetTagType(HAPLOTYPE_TAG, type); printErrorAndDie("Failed to extract haplotype tag"); } assert(haplotype == 1 || haplotype == 2); return (int)haplotype; }
std::string get_library(BamTools::BamAlignment& aln, std::map<std::string, std::string>& rg_to_library){ std::string rg; std::string rg_tag = "RG"; char tag_type = 'Z'; if (!aln.GetTagType(rg_tag, tag_type)) printErrorAndDie("Failed to retrieve BAM alignment's RG tag"); aln.GetTag("RG", rg); auto iter = rg_to_library.find(rg); if (iter == rg_to_library.end()) printErrorAndDie("No library found for read group " + rg + " in BAM file headers"); return iter->second; }
bool getNextAlignment(BamTools::BamAlignment &alignment, BamTools::BamReader &bamReader, const std::map<std::string, int> &groupID, std::vector< BamTools::BamAlignment > &alignmentSample, std::map<std::string, int> &wellIndex, unsigned int nSample) { if(nSample > 0) { // We are randomly sampling, so next read should come from the sample that was already taken from the bam file if(alignmentSample.size() > 0) { alignment = alignmentSample.back(); alignmentSample.pop_back(); alignment.BuildCharData(); return(true); } else { return(false); } } else { // No random sampling, so we're either returning everything or we're looking for specific read names bool storeRead = false; while(bamReader.GetNextAlignment(alignment)) { if(groupID.size() > 0) { std::string thisReadGroupID = ""; if( !alignment.GetTag("RG", thisReadGroupID) || (groupID.find(thisReadGroupID)==groupID.end()) ); continue; } storeRead=true; if(wellIndex.size() > 0) { // We are filtering by position, so check if we should skip or keep the read int thisCol,thisRow; if(1 != ion_readname_to_rowcol(alignment.Name.c_str(), &thisRow, &thisCol)) std::cerr << "Error parsing read name: " << alignment.Name << "\n"; std::stringstream wellIdStream; wellIdStream << thisCol << ":" << thisRow; std::map<std::string, int>::iterator wellIndexIter; wellIndexIter = wellIndex.find(wellIdStream.str()); if(wellIndexIter != wellIndex.end()) { // If the read ID matches we should keep, unless its a duplicate if(wellIndexIter->second >= 0) { storeRead=true; wellIndexIter->second=-1; } else { storeRead=false; std::cerr << "WARNING: found extra instance of readID " << wellIdStream.str() << ", keeping only first\n"; } } else { // read ID is not one we should keep storeRead=false; } } if(storeRead) break; } return(storeRead); } }
bool ReadContainer::GetIntBamTag(const BamTools::BamAlignment& aln, const std::string& tag_name, int* destination) { char tag_type; if (!aln.GetTagType(tag_name, tag_type)) {return false;} switch (tag_type) { case (BamTools::Constants::BAM_TAG_TYPE_INT32): return aln.GetTag(tag_name, *destination); case (BamTools::Constants::BAM_TAG_TYPE_INT8): int8_t d8; if (!aln.GetTag(tag_name, d8)) { return false; } *destination = static_cast<int>(d8); return true; case (BamTools::Constants::BAM_TAG_TYPE_UINT8): uint8_t ud8; if (!aln.GetTag(tag_name, ud8)) { return false; } *destination = static_cast<int>(ud8); return true; case (BamTools::Constants::BAM_TAG_TYPE_INT16): int16_t d16; if (!aln.GetTag(tag_name, d16)) { return false; } *destination = static_cast<int>(d16); return true; case (BamTools::Constants::BAM_TAG_TYPE_UINT16): uint16_t ud16; if (!aln.GetTag(tag_name, ud16)) { return false; } *destination = static_cast<int>(ud16); return true; case (BamTools::Constants::BAM_TAG_TYPE_UINT32): uint32_t ud32; if (!aln.GetTag(tag_name, ud32)) { return false; } *destination = static_cast<int>(ud32); return true; default: stringstream msg; msg << "Encountered unsupported tag type " << tag_type; PrintMessageDieOnError(msg.str(), ERROR); } return false; }
bool getTagParanoid(BamTools::BamAlignment &alignment, const std::string &tag, int64_t &value) { char tagType = ' '; if(alignment.GetTagType(tag, tagType)) { switch(tagType) { case BamTools::Constants::BAM_TAG_TYPE_INT8: { int8_t value_int8 = 0; alignment.GetTag(tag, value_int8); value = value_int8; } break; case BamTools::Constants::BAM_TAG_TYPE_UINT8: { uint8_t value_uint8 = 0; alignment.GetTag(tag, value_uint8); value = value_uint8; } break; case BamTools::Constants::BAM_TAG_TYPE_INT16: { int16_t value_int16 = 0; alignment.GetTag(tag, value_int16); value = value_int16; } break; case BamTools::Constants::BAM_TAG_TYPE_UINT16: { uint16_t value_uint16 = 0; alignment.GetTag(tag, value_uint16); value = value_uint16; } break; case BamTools::Constants::BAM_TAG_TYPE_INT32: { int32_t value_int32 = 0; alignment.GetTag(tag, value_int32); value = value_int32; } break; case BamTools::Constants::BAM_TAG_TYPE_UINT32: { uint32_t value_uint32 = 0; alignment.GetTag(tag, value_uint32); value = value_uint32; } break; default: { alignment.GetTag(tag, value); } break; } return(true); } else { return(false); } }
std::vector< IAlignment::SharedPtr > BamAlignmentReader::loadAlignmentsInRegion(Region::SharedPtr regionPtr, SampleManager::SharedPtr sampleManagerPtr, bool excludeDuplicateReads) { if (!m_is_open) { std::cout << "Bam file not opened" << std::endl; exit(0); } std::vector< IAlignment::SharedPtr > alignmentPtrs; int refID = this->m_bam_reader->GetReferenceID(regionPtr->getReferenceID()); // add 1 to the start and end positions because this is 0 based this->m_bam_reader->SetRegion(refID, regionPtr->getStartPosition(), refID, regionPtr->getEndPosition()); // std::cout << "BamAlignmentReader.cpp refID: " << refID << std::endl; BamTools::BamAlignment bamAlignment; while(this->m_bam_reader->GetNextAlignment(bamAlignment)) { if (bamAlignment.IsDuplicate() && excludeDuplicateReads) { continue; } std::string sampleName; bamAlignment.GetTag("RG", sampleName); Sample::SharedPtr samplePtr = sampleManagerPtr->getSamplePtr(sampleName); if (samplePtr == nullptr) { throw "There was an error in the sample name for: " + sampleName; } alignmentPtrs.push_back(std::make_shared< BamAlignment >(bamAlignment, samplePtr)); } // std::this_thread::sleep_for(std::chrono::milliseconds(10000)); if (m_alignment_reader_manager_ptr != nullptr) { m_alignment_reader_manager_ptr->checkinReader(this->shared_from_this()); } // std::cout << "got reads: " << regionPtr->getRegionString() << " " << alignmentPtrs.size() << std::endl; return alignmentPtrs; }
void Config::InitializationClustering() { struct stat st; if(stat(Workspace.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Workspace directory already present"); else if (mkdir(Workspace.c_str(), 0755) != 0) { Log("[Error] Could not create workspace directory: " + Workspace); exit(1); } RunningTasksFile = Workspace + "/" + FilePrefix + "running.tasks"; StatsFile = Workspace + "/" + FilePrefix + "stats"; BinClusterFile = Workspace + "/" + FilePrefix + "bpc"; clusterFile = new ClusterFile(BinClusterFile); clusterDir = Workspace + "/clusters/"; if(stat(clusterDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Cluster directory already present"); else if (mkdir(clusterDir.c_str(), 0755) != 0) { Log("[Error] Could not create cluster directory: " + clusterDir); exit(1); } insertsizeDir = Workspace + "/insertsize/"; if(stat(insertsizeDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Insertsize directory already present"); else if (mkdir(insertsizeDir.c_str(), 0755) != 0) { Log("[Error] Could not create insertsize directory: " + insertsizeDir); exit(1); } coverageDir = Workspace + "/coverage/"; if(stat(coverageDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Coverage directory already present"); else if (mkdir(coverageDir.c_str(), 0755) != 0) { Log("[Error] Could not create coverage directory: " + coverageDir); exit(1); } if (!ForwardBam.empty() && !ReverseBam.empty() && PairedBam.empty()) { UsePairedBam = false; } else if (ForwardBam.empty() && ReverseBam.empty() && !PairedBam.empty()) { UsePairedBam = true; } else { Log("[Error] No correct bam file(s)"); exit(1); } BamTools::BamAlignment alignment; BamTools::BamReader BamReader; if (UsePairedBam) { BamReader.Open(PairedBam); if (not BamReader.IsOpen()) { Log("[Error] Could not open paired bam"); exit(1); } if (PairedIndex.empty()) { if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) { PairedIndex = PairedBam.substr(0,PairedBam.find_last_of(".bam")-3) + ".bai"; BamReader.OpenIndex(PairedIndex); } if (not BamReader.HasIndex()) { Log("[Error] No index for bamfile"); exit(1); } } BamTools::SamHeader header = BamReader.GetHeader(); for (BamTools::SamReadGroupIterator it = header.ReadGroups.Begin(); it != header.ReadGroups.End(); it++) { BamTools::SamReadGroup* readgroup = &*it; readNameConverter.TrimName(readgroup->ID); readNameConverter.AddReadGroup(readgroup->ID); } long int count = 0; while (BamReader.GetNextAlignment(alignment)) { string RG; if (alignment.GetTag("RG", RG)) { if (not NameTrim.empty()) readNameConverter.TrimName(RG); if (readNameConverter.AddReadGroup(RG)) { Log("[Warning] Readgroup '" + RG + "' found in reads but not in header"); count = 0; } } count++; if (count > 10000) break; } BamReader.Close(); } else { BamReader.Open(ForwardBam); if (not BamReader.IsOpen()) { Log("[Error] Could not open first/forward bam"); exit(1); } if (ForwardIndex.empty()) { if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) { ForwardIndex = ForwardBam.substr(0,ForwardBam.find_last_of(".bam")-3) + ".bai"; BamReader.OpenIndex(ForwardIndex); } if (not BamReader.HasIndex()) { Log("[Error] No index for forward bamfile"); exit(1); } } BamTools::SamHeader forwardheader = BamReader.GetHeader(); for (BamTools::SamReadGroupIterator it = forwardheader.ReadGroups.Begin(); it != forwardheader.ReadGroups.End(); it++) { BamTools::SamReadGroup* readgroup = &*it; readNameConverter.TrimName(readgroup->ID); readNameConverter.AddReadGroup(readgroup->ID); } long int count = 0; while (BamReader.GetNextAlignment(alignment)) { string RG; if (alignment.GetTag("RG", RG)) { if (!NameTrim.empty()) readNameConverter.TrimName(RG); if (readNameConverter.AddReadGroup(RG)) { Log("[Warning] Readgroup '" + RG + "' found in forward reads but not in header"); count = 0; } } count++; if (count > 10000) break; } BamReader.Close(); BamReader.Open(ReverseBam); if (not BamReader.IsOpen()) { Log("[Error] Could not open second/reverse bam"); exit(1); } if (ReverseIndex.empty()) { if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) { ReverseIndex = ReverseBam.substr(0,ReverseBam.find_last_of(".bam")-3) + ".bai"; BamReader.OpenIndex(ReverseIndex); } if (not BamReader.HasIndex()) { Log("[Error] No index for reverse bamfile"); exit(1); } } BamTools::SamHeader reverseheader = BamReader.GetHeader(); for (BamTools::SamReadGroupIterator it = reverseheader.ReadGroups.Begin(); it != reverseheader.ReadGroups.End(); it++) { BamTools::SamReadGroup* readgroup = &*it; readNameConverter.TrimName(readgroup->ID); if (readNameConverter.AddReadGroup(readgroup->ID)) { Log("[Warning] Readgroup '" + readgroup->ID + "' found in reverse but not in forward"); } } count = 0; while (BamReader.GetNextAlignment(alignment)) { string RG; if (alignment.GetTag("RG", RG)) { if (!NameTrim.empty()) readNameConverter.TrimName(RG); if (readNameConverter.AddReadGroup(RG)) { Log("[Warning] Readgroup '" + RG + "' found in reverse reads but not in header"); count = 0; } } count++; if (count > 10000) break; } BamReader.Close(); } for(map<string, int>::iterator it = readNameConverter.ReadGroups.begin(); it!=readNameConverter.ReadGroups.end(); ++it) { ostringstream logBuffer; logBuffer << "Readgroup found: " << it->second << " - " << it->first; Log(logBuffer.str()); } writeConfigFile(Workspace + FilePrefix + "config"); }