bool GetBamTags(BamTools::BamAlignment &alignment,
			            const int              &num_flows,
	                    vector<float>          &measurements,
			            vector<float>          &phase_params,
	                    int                    &start_flow) {

  vector<int16_t>  quantized_measurements;
  // Retrieve normalized measurements from BAM file
  if (not alignment.GetTag("ZM", quantized_measurements)) {
    cerr << "ERROR: Normalized measurements ZM:tag is not present in read " << alignment.Name << endl;
    return false;
  }
  if ((int)quantized_measurements.size() > num_flows) {
    cerr << "ERROR: Normalized measurements ZM:tag length exceeds flow order length in read " << alignment.Name << endl;
    return false;
  }
  measurements.assign(quantized_measurements.size(), 0.0);
  for (size_t counter = 0; counter < quantized_measurements.size(); ++counter)
    measurements.at(counter) = (float)quantized_measurements.at(counter)/256;

  // Retrieve phasing parameters from BAM file
  if (not alignment.GetTag("ZP", phase_params)) {
    cerr << "ERROR: Phasing Parameters ZP:tag is not present in read " << alignment.Name << endl;
    return false;
  }
  if (phase_params.size() != 3) {
    cerr << "ERROR: Phasing Parameters ZP:tag does not have 3 phase parameters in read " << alignment.Name << endl;
    return false;
  }
  if (phase_params[0] < 0 or phase_params[0] > 1 or phase_params[1] < 0 or phase_params[1] > 1
      or phase_params[2] < 0 or phase_params[2] > 1) {
    cerr << "ERROR: Phasing Parameters ZP:tag outside of [0,1] range in read " << alignment.Name << endl;
    return false;
  }
  phase_params[2] = 0.0f;   // ad-hoc corrector: zero droop

  // Retrieve start flow
  if (not alignment.GetTag("ZF", start_flow)) {
    cerr << "ERROR: Start Flow ZF:tag not found in read " << alignment.Name << endl;
    return false;
  }
  if (start_flow < 0 or start_flow >= num_flows) {
    cerr << "ERROR: Start flow outsize of [0,num_flows) range in read " << alignment.Name << endl;
    cerr << "Start flow: " << start_flow << " Number of flows: " << num_flows;
    return false;
  }
  // A start flow of zero indicated a read that did not pass basecaller filters
  if (start_flow == 0) {
    cerr << "WARNING: Start Flow ZF:tag has zero value in read " << alignment.Name << endl;
    return false;
  }
  return true;
}
bool ReadContainer::GetFloatBamTag(const BamTools::BamAlignment& aln,
		     const std::string& tag_name, float* destination) {
  if (!aln.GetTag(tag_name, *destination)) {
    return false;
  }
  return true;
}
Exemple #3
0
std::string getQuickStats(const std::string &bamFile, std::map< std::string, int > &keyLen, unsigned int &nFlowFZ, unsigned int &nFlowZM) {
	std::string errMsg = "";
	BamTools::BamReader bamReader;
	if(!bamReader.Open(bamFile)) {
		errMsg += "Failed to open bam " + bamFile + "\n";
		return(errMsg);
	}
	BamTools::SamHeader samHeader = bamReader.GetHeader();
	for (BamTools::SamReadGroupIterator itr = samHeader.ReadGroups.Begin(); itr != samHeader.ReadGroups.End(); ++itr ) {
		if(itr->HasID())
			keyLen[itr->ID] = itr->HasKeySequence() ? itr->KeySequence.length() : 0;
		if(itr->HasFlowOrder())
			nFlowZM = std::max(nFlowZM,(unsigned int) itr->FlowOrder.length());
	}
	BamTools::BamAlignment alignment;
	std::vector<uint16_t> flowIntFZ;
	while(bamReader.GetNextAlignment(alignment)) {
		if(alignment.GetTag("FZ", flowIntFZ))
			nFlowFZ = flowIntFZ.size();
		break;
	}
	bamReader.Close();
	if(nFlowFZ==0)
		std::cout << "NOTE: bam file has no flow signals in FZ tag: " + bamFile + "\n";
	if(nFlowZM==0)
		std::cout << "NOTE: bam file has no flow signals in ZM tag: " + bamFile + "\n";
	return(errMsg);
}
Exemple #4
0
// Calculate the error rate between the read and the reference
double getErrorRate(BamTools::BamAlignment& record)
{
    int nm = 0;
    bool hasNM = record.GetTag("NM", nm);
    if(hasNM)
        return (double)nm / record.Length;
    else
        return 0.0f;
}
Exemple #5
0
bool  MolecularTagTrimmer::GetTagsFromBamAlignment(const BamTools::BamAlignment& alignment, MolTag& Tags)
{
  // Don't bother if there is nothing to look at
  if (num_read_groups_with_tags_ == 0){
    Tags.Clear();
    return true;
  }

  // Load Tags from Bam Alignment
  if (not alignment.GetTag("ZT", Tags.prefix_mol_tag))
    Tags.prefix_mol_tag.clear();

  if (not alignment.GetTag("YT", Tags.suffix_mol_tag))
    Tags.suffix_mol_tag.clear();

  // Check if this read should have tags associated with it
  string read_group_name;
  if (not alignment.GetTag("RG",read_group_name))
    return false;

  std::map<string,int>::const_iterator idx_it = read_group_name_to_index_.find(read_group_name);
  if (idx_it == read_group_name_to_index_.end())
    return false;

  if (NeedPrefixTag(idx_it->second)) {
    if (Tags.prefix_mol_tag.empty())
      return false;
  }
  else
    Tags.prefix_mol_tag.clear();

  if (NeedSuffixTag(idx_it->second)) {
    if (Tags.suffix_mol_tag.empty())
      return false;
  }
  else
    Tags.suffix_mol_tag.clear();

  // We don't allow the joint analysis of tagged and untagged samples at the same time
  if (not Tags.HasTags())
    return false;

  return true;
}
Exemple #6
0
int SNPBamProcessor::get_haplotype(BamTools::BamAlignment& aln){
  if (!aln.HasTag(HAPLOTYPE_TAG))
    return -1;
  uint8_t haplotype;
  if (!aln.GetTag(HAPLOTYPE_TAG, haplotype)){
    char type;
    aln.GetTagType(HAPLOTYPE_TAG, type);
    printErrorAndDie("Failed to extract haplotype tag");
  }
  assert(haplotype == 1 || haplotype == 2);
  return (int)haplotype;
}
Exemple #7
0
std::string get_library(BamTools::BamAlignment& aln, std::map<std::string, std::string>& rg_to_library){
  std::string rg;
  std::string rg_tag = "RG";
  char tag_type = 'Z';
  if (!aln.GetTagType(rg_tag, tag_type))
    printErrorAndDie("Failed to retrieve BAM alignment's RG tag");
  aln.GetTag("RG", rg);
  auto iter = rg_to_library.find(rg);
  if (iter == rg_to_library.end())
    printErrorAndDie("No library found for read group " + rg + " in BAM file headers");
  return iter->second;
}
Exemple #8
0
bool getNextAlignment(BamTools::BamAlignment &alignment, BamTools::BamReader &bamReader, const std::map<std::string, int> &groupID, std::vector< BamTools::BamAlignment > &alignmentSample, std::map<std::string, int> &wellIndex, unsigned int nSample) {
	if(nSample > 0) {
		// We are randomly sampling, so next read should come from the sample that was already taken from the bam file
		if(alignmentSample.size() > 0) {
			alignment = alignmentSample.back();
			alignmentSample.pop_back();
			alignment.BuildCharData();
			return(true);
		} else {
			return(false);
		}
	} else {
		// No random sampling, so we're either returning everything or we're looking for specific read names
		bool storeRead = false;
		while(bamReader.GetNextAlignment(alignment)) {
			if(groupID.size() > 0) {
				std::string thisReadGroupID = "";
				if( !alignment.GetTag("RG", thisReadGroupID) || (groupID.find(thisReadGroupID)==groupID.end()) );
					continue;
			}
			storeRead=true;
			if(wellIndex.size() > 0) {
				// We are filtering by position, so check if we should skip or keep the read
				int thisCol,thisRow;
				if(1 != ion_readname_to_rowcol(alignment.Name.c_str(), &thisRow, &thisCol))
					std::cerr << "Error parsing read name: " << alignment.Name << "\n";
				std::stringstream wellIdStream;
				wellIdStream << thisCol << ":" << thisRow;
				std::map<std::string, int>::iterator wellIndexIter;
				wellIndexIter = wellIndex.find(wellIdStream.str());
				if(wellIndexIter != wellIndex.end()) {
					// If the read ID matches we should keep, unless its a duplicate
					if(wellIndexIter->second >= 0) {
						storeRead=true;
						wellIndexIter->second=-1;
					} else {
						storeRead=false;
						std::cerr << "WARNING: found extra instance of readID " << wellIdStream.str() << ", keeping only first\n";
					}
				} else {
					// read ID is not one we should keep
					storeRead=false;
				}
			}
			if(storeRead)
				break;
		}
		return(storeRead);
	}
}
bool ReadContainer::GetIntBamTag(const BamTools::BamAlignment& aln,
		  const std::string& tag_name, int* destination) {
  char tag_type;
  if (!aln.GetTagType(tag_name, tag_type)) {return false;}
  switch (tag_type) {
  case (BamTools::Constants::BAM_TAG_TYPE_INT32):
    return aln.GetTag(tag_name, *destination);
  case (BamTools::Constants::BAM_TAG_TYPE_INT8):
    int8_t d8;
    if (!aln.GetTag(tag_name, d8)) {
      return false;
    }
    *destination = static_cast<int>(d8);
    return true;
  case (BamTools::Constants::BAM_TAG_TYPE_UINT8):
    uint8_t ud8;
    if (!aln.GetTag(tag_name, ud8)) {
      return false;
    }
    *destination = static_cast<int>(ud8);
    return true;
  case (BamTools::Constants::BAM_TAG_TYPE_INT16):
    int16_t d16;
    if (!aln.GetTag(tag_name, d16)) {
      return false;
    }
    *destination = static_cast<int>(d16);
    return true;
  case (BamTools::Constants::BAM_TAG_TYPE_UINT16):
    uint16_t ud16;
    if (!aln.GetTag(tag_name, ud16)) {
      return false;
    }
    *destination = static_cast<int>(ud16);
    return true;
  case (BamTools::Constants::BAM_TAG_TYPE_UINT32):
    uint32_t ud32;
    if (!aln.GetTag(tag_name, ud32)) {
      return false;
    }
    *destination = static_cast<int>(ud32);
    return true;
  default:
    stringstream msg;
    msg << "Encountered unsupported tag type " << tag_type;
    PrintMessageDieOnError(msg.str(), ERROR);
  }
  return false;
}
Exemple #10
0
bool getTagParanoid(BamTools::BamAlignment &alignment, const std::string &tag, int64_t &value) {
	char tagType = ' ';
	if(alignment.GetTagType(tag, tagType)) {
		switch(tagType) {
			case BamTools::Constants::BAM_TAG_TYPE_INT8: {
				int8_t value_int8 = 0;
				alignment.GetTag(tag, value_int8);
				value = value_int8;
			} break;
			case BamTools::Constants::BAM_TAG_TYPE_UINT8: {
				uint8_t value_uint8 = 0;
				alignment.GetTag(tag, value_uint8);
				value = value_uint8;
			} break;
			case BamTools::Constants::BAM_TAG_TYPE_INT16: {
				int16_t value_int16 = 0;
				alignment.GetTag(tag, value_int16);
				value = value_int16;
			} break;
			case BamTools::Constants::BAM_TAG_TYPE_UINT16: {
				uint16_t value_uint16 = 0;
				alignment.GetTag(tag, value_uint16);
				value = value_uint16;
			} break;
			case BamTools::Constants::BAM_TAG_TYPE_INT32: {
				int32_t value_int32 = 0;
				alignment.GetTag(tag, value_int32);
				value = value_int32;
			} break;
			case BamTools::Constants::BAM_TAG_TYPE_UINT32: {
				uint32_t value_uint32 = 0;
				alignment.GetTag(tag, value_uint32);
				value = value_uint32;
			} break;
			default: {
				alignment.GetTag(tag, value);
			} break;
		}
		return(true);
	} else {
		return(false);
	}
}
	std::vector< IAlignment::SharedPtr > BamAlignmentReader::loadAlignmentsInRegion(Region::SharedPtr regionPtr, SampleManager::SharedPtr sampleManagerPtr, bool excludeDuplicateReads)
	{
		if (!m_is_open)
		{
			std::cout << "Bam file not opened" << std::endl;
			exit(0);
		}
		std::vector< IAlignment::SharedPtr > alignmentPtrs;

		int refID = this->m_bam_reader->GetReferenceID(regionPtr->getReferenceID());
		// add 1 to the start and end positions because this is 0 based
		this->m_bam_reader->SetRegion(refID, regionPtr->getStartPosition(), refID, regionPtr->getEndPosition());

		// std::cout << "BamAlignmentReader.cpp refID: " << refID << std::endl;
		BamTools::BamAlignment bamAlignment;
		while(this->m_bam_reader->GetNextAlignment(bamAlignment))
		{
            if (bamAlignment.IsDuplicate() && excludeDuplicateReads) { continue; }
			std::string sampleName;
			bamAlignment.GetTag("RG", sampleName);

			Sample::SharedPtr samplePtr = sampleManagerPtr->getSamplePtr(sampleName);
			if (samplePtr == nullptr)
			{
				throw "There was an error in the sample name for: " + sampleName;
			}
			alignmentPtrs.push_back(std::make_shared< BamAlignment >(bamAlignment, samplePtr));
		}
		// std::this_thread::sleep_for(std::chrono::milliseconds(10000));
		if (m_alignment_reader_manager_ptr != nullptr)
		{
			m_alignment_reader_manager_ptr->checkinReader(this->shared_from_this());
		}

		// std::cout << "got reads: " << regionPtr->getRegionString() << " " << alignmentPtrs.size() << std::endl;
		return alignmentPtrs;
	}
Exemple #12
0
void Config::InitializationClustering() {
    struct stat st;
    if(stat(Workspace.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Workspace directory already present");
    else if (mkdir(Workspace.c_str(), 0755) != 0) {
        Log("[Error] Could not create workspace directory: " + Workspace);
        exit(1);
    }
    RunningTasksFile = Workspace + "/" + FilePrefix + "running.tasks";
    StatsFile = Workspace + "/" + FilePrefix + "stats";
    BinClusterFile = Workspace + "/" + FilePrefix + "bpc";
    clusterFile = new ClusterFile(BinClusterFile);
    clusterDir = Workspace + "/clusters/";
    if(stat(clusterDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Cluster directory already present");
    else if (mkdir(clusterDir.c_str(), 0755) != 0) {
        Log("[Error] Could not create cluster directory: " + clusterDir);
        exit(1);
    }
    insertsizeDir = Workspace + "/insertsize/";
    if(stat(insertsizeDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Insertsize directory already present");
    else if (mkdir(insertsizeDir.c_str(), 0755) != 0) {
        Log("[Error] Could not create insertsize directory: " + insertsizeDir);
        exit(1);
    }
    coverageDir = Workspace + "/coverage/";
    if(stat(coverageDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Coverage directory already present");
    else if (mkdir(coverageDir.c_str(), 0755) != 0) {
        Log("[Error] Could not create coverage directory: " + coverageDir);
        exit(1);
    }
    
    if (!ForwardBam.empty() && !ReverseBam.empty() && PairedBam.empty()) {
        UsePairedBam = false;
    } else if (ForwardBam.empty() && ReverseBam.empty() && !PairedBam.empty()) {
        UsePairedBam = true;
    } else {
        Log("[Error] No correct bam file(s)");
        exit(1);
    }
    
    BamTools::BamAlignment alignment;
    BamTools::BamReader BamReader;
    
    if (UsePairedBam) {
        BamReader.Open(PairedBam);
        if (not BamReader.IsOpen()) {
            Log("[Error] Could not open paired bam");
            exit(1);
        }
        if (PairedIndex.empty()) {
            if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) {
                PairedIndex = PairedBam.substr(0,PairedBam.find_last_of(".bam")-3) + ".bai";
                BamReader.OpenIndex(PairedIndex);
            }
            if (not BamReader.HasIndex()) {
                Log("[Error] No index for bamfile");
                exit(1);
            }
        }
        BamTools::SamHeader header = BamReader.GetHeader();
        for (BamTools::SamReadGroupIterator it = header.ReadGroups.Begin(); it != header.ReadGroups.End(); it++) {
            BamTools::SamReadGroup* readgroup = &*it;
            readNameConverter.TrimName(readgroup->ID);
            readNameConverter.AddReadGroup(readgroup->ID);
        }
        long int count = 0;
        while (BamReader.GetNextAlignment(alignment)) {
            string RG;
            if (alignment.GetTag("RG", RG)) {
                if (not NameTrim.empty()) readNameConverter.TrimName(RG);
                if (readNameConverter.AddReadGroup(RG)) {
                    Log("[Warning] Readgroup '" + RG + "' found in reads but not in header");
                    count = 0;
                }
            }
            count++;
            if (count > 10000) break;
        }
        BamReader.Close();
    } else {
        BamReader.Open(ForwardBam);
        if (not BamReader.IsOpen()) {
            Log("[Error] Could not open first/forward bam");
            exit(1);
        }
        if (ForwardIndex.empty()) {
            if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) {
                ForwardIndex = ForwardBam.substr(0,ForwardBam.find_last_of(".bam")-3) + ".bai";
                BamReader.OpenIndex(ForwardIndex);
            }
            if (not BamReader.HasIndex()) {
                Log("[Error] No index for forward bamfile");
                exit(1);
            }
        }
        BamTools::SamHeader forwardheader = BamReader.GetHeader();
        for (BamTools::SamReadGroupIterator it = forwardheader.ReadGroups.Begin(); it != forwardheader.ReadGroups.End(); it++) {
            BamTools::SamReadGroup* readgroup = &*it;
            readNameConverter.TrimName(readgroup->ID);
            readNameConverter.AddReadGroup(readgroup->ID);
        }
        long int count = 0;
        while (BamReader.GetNextAlignment(alignment)) {
            string RG;
            if (alignment.GetTag("RG", RG)) {
                if (!NameTrim.empty()) readNameConverter.TrimName(RG);
                if (readNameConverter.AddReadGroup(RG)) {
                    Log("[Warning] Readgroup '" + RG + "' found in forward reads but not in header");
                    count = 0;
                }
            }
            count++;
            if (count > 10000) break;
        }
        BamReader.Close();
        BamReader.Open(ReverseBam);
        if (not BamReader.IsOpen()) {
            Log("[Error] Could not open second/reverse bam");
            exit(1);
        }
        if (ReverseIndex.empty()) {
            if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) {
                ReverseIndex = ReverseBam.substr(0,ReverseBam.find_last_of(".bam")-3) + ".bai";
                BamReader.OpenIndex(ReverseIndex);
            }
            if (not BamReader.HasIndex()) {
                Log("[Error] No index for reverse bamfile");
                exit(1);
            }
        }
        BamTools::SamHeader reverseheader = BamReader.GetHeader();
        for (BamTools::SamReadGroupIterator it = reverseheader.ReadGroups.Begin(); it != reverseheader.ReadGroups.End(); it++) {
            BamTools::SamReadGroup* readgroup = &*it;
            readNameConverter.TrimName(readgroup->ID);
            if (readNameConverter.AddReadGroup(readgroup->ID)) {
                Log("[Warning] Readgroup '" + readgroup->ID + "' found in reverse but not in forward");
            }
        }
        count = 0;
        while (BamReader.GetNextAlignment(alignment)) {
            string RG;
            if (alignment.GetTag("RG", RG)) {
                if (!NameTrim.empty()) readNameConverter.TrimName(RG);
                if (readNameConverter.AddReadGroup(RG)) {
                    Log("[Warning] Readgroup '" + RG + "' found in reverse reads but not in header");
                    count = 0;
                } 
            }
            count++;
            if (count > 10000) break;
        }
        BamReader.Close();
    }
    
    for(map<string, int>::iterator it = readNameConverter.ReadGroups.begin(); it!=readNameConverter.ReadGroups.end(); ++it) {
        ostringstream logBuffer;
        logBuffer << "Readgroup found: " << it->second << " - " << it->first;
        Log(logBuffer.str());
    }
    
    writeConfigFile(Workspace + FilePrefix + "config");
}