C++ (Cpp) BamAlignment Exemples, bamtools::BamAlignment C++ (Cpp) Exemples

Exemple #1

0

Afficher le fichier

Fichier : BamHelper.cpp Projet : hgy851018/TS

std::string getQuickStats(const std::string &bamFile, std::map< std::string, int > &keyLen, unsigned int &nFlowFZ, unsigned int &nFlowZM) {
	std::string errMsg = "";
	BamTools::BamReader bamReader;
	if(!bamReader.Open(bamFile)) {
		errMsg += "Failed to open bam " + bamFile + "\n";
		return(errMsg);
	}
	BamTools::SamHeader samHeader = bamReader.GetHeader();
	for (BamTools::SamReadGroupIterator itr = samHeader.ReadGroups.Begin(); itr != samHeader.ReadGroups.End(); ++itr ) {
		if(itr->HasID())
			keyLen[itr->ID] = itr->HasKeySequence() ? itr->KeySequence.length() : 0;
		if(itr->HasFlowOrder())
			nFlowZM = std::max(nFlowZM,(unsigned int) itr->FlowOrder.length());
	}
	BamTools::BamAlignment alignment;
	std::vector<uint16_t> flowIntFZ;
	while(bamReader.GetNextAlignment(alignment)) {
		if(alignment.GetTag("FZ", flowIntFZ))
			nFlowFZ = flowIntFZ.size();
		break;
	}
	bamReader.Close();
	if(nFlowFZ==0)
		std::cout << "NOTE: bam file has no flow signals in FZ tag: " + bamFile + "\n";
	if(nFlowZM==0)
		std::cout << "NOTE: bam file has no flow signals in ZM tag: " + bamFile + "\n";
	return(errMsg);
}

Exemple #2

0

Afficher le fichier

Fichier : BaseHypothesisEvaluator.cpp Projet : fw1121/Pandoras-Toolbox-for-Bioinformatics

bool GetBamTags(BamTools::BamAlignment &alignment,
			            const int              &num_flows,
	                    vector<float>          &measurements,
			            vector<float>          &phase_params,
	                    int                    &start_flow) {

  vector<int16_t>  quantized_measurements;
  // Retrieve normalized measurements from BAM file
  if (not alignment.GetTag("ZM", quantized_measurements)) {
    cerr << "ERROR: Normalized measurements ZM:tag is not present in read " << alignment.Name << endl;
    return false;
  }
  if ((int)quantized_measurements.size() > num_flows) {
    cerr << "ERROR: Normalized measurements ZM:tag length exceeds flow order length in read " << alignment.Name << endl;
    return false;
  }
  measurements.assign(quantized_measurements.size(), 0.0);
  for (size_t counter = 0; counter < quantized_measurements.size(); ++counter)
    measurements.at(counter) = (float)quantized_measurements.at(counter)/256;

  // Retrieve phasing parameters from BAM file
  if (not alignment.GetTag("ZP", phase_params)) {
    cerr << "ERROR: Phasing Parameters ZP:tag is not present in read " << alignment.Name << endl;
    return false;
  }
  if (phase_params.size() != 3) {
    cerr << "ERROR: Phasing Parameters ZP:tag does not have 3 phase parameters in read " << alignment.Name << endl;
    return false;
  }
  if (phase_params[0] < 0 or phase_params[0] > 1 or phase_params[1] < 0 or phase_params[1] > 1
      or phase_params[2] < 0 or phase_params[2] > 1) {
    cerr << "ERROR: Phasing Parameters ZP:tag outside of [0,1] range in read " << alignment.Name << endl;
    return false;
  }
  phase_params[2] = 0.0f;   // ad-hoc corrector: zero droop

  // Retrieve start flow
  if (not alignment.GetTag("ZF", start_flow)) {
    cerr << "ERROR: Start Flow ZF:tag not found in read " << alignment.Name << endl;
    return false;
  }
  if (start_flow < 0 or start_flow >= num_flows) {
    cerr << "ERROR: Start flow outsize of [0,num_flows) range in read " << alignment.Name << endl;
    cerr << "Start flow: " << start_flow << " Number of flows: " << num_flows;
    return false;
  }
  // A start flow of zero indicated a read that did not pass basecaller filters
  if (start_flow == 0) {
    cerr << "WARNING: Start Flow ZF:tag has zero value in read " << alignment.Name << endl;
    return false;
  }
  return true;
}

Exemple #3

0

Afficher le fichier

Fichier : snp_bam_processor.cpp Projet : mrG7/HipSTR

int SNPBamProcessor::get_haplotype(BamTools::BamAlignment& aln){
  if (!aln.HasTag(HAPLOTYPE_TAG))
    return -1;
  uint8_t haplotype;
  if (!aln.GetTag(HAPLOTYPE_TAG, haplotype)){
    char type;
    aln.GetTagType(HAPLOTYPE_TAG, type);
    printErrorAndDie("Failed to extract haplotype tag");
  }
  assert(haplotype == 1 || haplotype == 2);
  return (int)haplotype;
}

Exemple #4

0

Afficher le fichier

Fichier : pcr_duplicates.cpp Projet : mrG7/HipSTR

std::string get_library(BamTools::BamAlignment& aln, std::map<std::string, std::string>& rg_to_library){
  std::string rg;
  std::string rg_tag = "RG";
  char tag_type = 'Z';
  if (!aln.GetTagType(rg_tag, tag_type))
    printErrorAndDie("Failed to retrieve BAM alignment's RG tag");
  aln.GetTag("RG", rg);
  auto iter = rg_to_library.find(rg);
  if (iter == rg_to_library.end())
    printErrorAndDie("No library found for read group " + rg + " in BAM file headers");
  return iter->second;
}

Exemple #5

0

Afficher le fichier

Fichier : BamHelper.cpp Projet : hgy851018/TS

bool getNextAlignment(BamTools::BamAlignment &alignment, BamTools::BamReader &bamReader, const std::map<std::string, int> &groupID, std::vector< BamTools::BamAlignment > &alignmentSample, std::map<std::string, int> &wellIndex, unsigned int nSample) {
	if(nSample > 0) {
		// We are randomly sampling, so next read should come from the sample that was already taken from the bam file
		if(alignmentSample.size() > 0) {
			alignment = alignmentSample.back();
			alignmentSample.pop_back();
			alignment.BuildCharData();
			return(true);
		} else {
			return(false);
		}
	} else {
		// No random sampling, so we're either returning everything or we're looking for specific read names
		bool storeRead = false;
		while(bamReader.GetNextAlignment(alignment)) {
			if(groupID.size() > 0) {
				std::string thisReadGroupID = "";
				if( !alignment.GetTag("RG", thisReadGroupID) || (groupID.find(thisReadGroupID)==groupID.end()) );
					continue;
			}
			storeRead=true;
			if(wellIndex.size() > 0) {
				// We are filtering by position, so check if we should skip or keep the read
				int thisCol,thisRow;
				if(1 != ion_readname_to_rowcol(alignment.Name.c_str(), &thisRow, &thisCol))
					std::cerr << "Error parsing read name: " << alignment.Name << "\n";
				std::stringstream wellIdStream;
				wellIdStream << thisCol << ":" << thisRow;
				std::map<std::string, int>::iterator wellIndexIter;
				wellIndexIter = wellIndex.find(wellIdStream.str());
				if(wellIndexIter != wellIndex.end()) {
					// If the read ID matches we should keep, unless its a duplicate
					if(wellIndexIter->second >= 0) {
						storeRead=true;
						wellIndexIter->second=-1;
					} else {
						storeRead=false;
						std::cerr << "WARNING: found extra instance of readID " << wellIdStream.str() << ", keeping only first\n";
					}
				} else {
					// read ID is not one we should keep
					storeRead=false;
				}
			}
			if(storeRead)
				break;
		}
		return(storeRead);
	}
}

Exemple #6

0

Afficher le fichier

Fichier : RegionCoverage.cpp Projet : biocyberman/TS

void RegionCoverage::TrackReadsOnRegion( const BamTools::BamAlignment &aread, uint32_t endPos )
{
	// track total and on-target reads
	uint32_t readEnd = endPos ? endPos : aread.GetEndPosition();
	uint32_t covType = ReadOnRegion( aread.RefID, aread.Position + 1, readEnd );
	TargetContig *contig = m_contigList[m_rcovContigIdx];
	if( aread.IsReverseStrand() ) {
		++contig->fwdReads;
		if( covType & 1 ) ++contig->fwdTrgReads;
	} else {
		++contig->revReads;
		if( covType & 1 ) ++contig->revTrgReads;
	}
}

Exemple #7

0

Afficher le fichier

Fichier : ReadContainer.cpp Projet : roland-ewald/lobstr-code

bool ReadContainer::GetFloatBamTag(const BamTools::BamAlignment& aln,
		     const std::string& tag_name, float* destination) {
  if (!aln.GetTag(tag_name, *destination)) {
    return false;
  }
  return true;
}

Exemple #8

0

Afficher le fichier

Fichier : filterBAM.cpp Projet : avilella/sga

// Calculate the error rate between the read and the reference
double getErrorRate(BamTools::BamAlignment& record)
{
    int nm = 0;
    bool hasNM = record.GetTag("NM", nm);
    if(hasNM)
        return (double)nm / record.Length;
    else
        return 0.0f;
}

Exemple #9

0

Afficher le fichier

bool  MolecularTagTrimmer::GetTagsFromBamAlignment(const BamTools::BamAlignment& alignment, MolTag& Tags)
{
  // Don't bother if there is nothing to look at
  if (num_read_groups_with_tags_ == 0){
    Tags.Clear();
    return true;
  }

  // Load Tags from Bam Alignment
  if (not alignment.GetTag("ZT", Tags.prefix_mol_tag))
    Tags.prefix_mol_tag.clear();

  if (not alignment.GetTag("YT", Tags.suffix_mol_tag))
    Tags.suffix_mol_tag.clear();

  // Check if this read should have tags associated with it
  string read_group_name;
  if (not alignment.GetTag("RG",read_group_name))
    return false;

  std::map<string,int>::const_iterator idx_it = read_group_name_to_index_.find(read_group_name);
  if (idx_it == read_group_name_to_index_.end())
    return false;

  if (NeedPrefixTag(idx_it->second)) {
    if (Tags.prefix_mol_tag.empty())
      return false;
  }
  else
    Tags.prefix_mol_tag.clear();

  if (NeedSuffixTag(idx_it->second)) {
    if (Tags.suffix_mol_tag.empty())
      return false;
  }
  else
    Tags.suffix_mol_tag.clear();

  // We don't allow the joint analysis of tagged and untagged samples at the same time
  if (not Tags.HasTags())
    return false;

  return true;
}

Exemple #10

0

Afficher le fichier

Fichier : BamAlignmentReader.cpp Projet : WilliamRichards2017/graphite

	uint32_t BamAlignmentReader::GetReadLength(const std::string& bamPath)
	{
		uint32_t bamReadLength = 300;
		BamTools::BamReader bamReader;
		if (!bamReader.Open(bamPath))
		{
			throw "Unable to open bam file";
		}
		BamTools::BamAlignment bamAlignment;
		while(bamReader.GetNextAlignment(bamAlignment))
		{
			if (bamAlignment.IsPrimaryAlignment())
			{
				bamReadLength = bamAlignment.QueryBases.size();
				break;
			}
		}
		bamReader.Close();
		return bamReadLength;
	}

Exemple #11

0

Afficher le fichier

Fichier : filterBAM.cpp Projet : avilella/sga

// Read an alignment pair from the BamReader.
// Returns false if the read fails
bool readAlignmentPair(BamTools::BamReader* pReader, 
                       BamTools::BamAlignment& record1,
                       BamTools::BamAlignment& record2)
{
    // Read a pair from the BAM
    // Read record 1. Skip secondary alignments of the previous pair
    do
    {
        if(!pReader->GetNextAlignment(record1))
            return false;
    } while(!record1.IsPrimaryAlignment());

    // Read record 2.
    do
    {
        if(!pReader->GetNextAlignment(record2))
            return false;
    } while(!record2.IsPrimaryAlignment());
    return true;
}

Exemple #12

0

Afficher le fichier

Fichier : BamAlignmentReader.cpp Projet : WilliamRichards2017/graphite

	std::vector< IAlignment::SharedPtr > BamAlignmentReader::loadAlignmentsInRegion(Region::SharedPtr regionPtr, SampleManager::SharedPtr sampleManagerPtr, bool excludeDuplicateReads)
	{
		if (!m_is_open)
		{
			std::cout << "Bam file not opened" << std::endl;
			exit(0);
		}
		std::vector< IAlignment::SharedPtr > alignmentPtrs;

		int refID = this->m_bam_reader->GetReferenceID(regionPtr->getReferenceID());
		// add 1 to the start and end positions because this is 0 based
		this->m_bam_reader->SetRegion(refID, regionPtr->getStartPosition(), refID, regionPtr->getEndPosition());

		// std::cout << "BamAlignmentReader.cpp refID: " << refID << std::endl;
		BamTools::BamAlignment bamAlignment;
		while(this->m_bam_reader->GetNextAlignment(bamAlignment))
		{
            if (bamAlignment.IsDuplicate() && excludeDuplicateReads) { continue; }
			std::string sampleName;
			bamAlignment.GetTag("RG", sampleName);

			Sample::SharedPtr samplePtr = sampleManagerPtr->getSamplePtr(sampleName);
			if (samplePtr == nullptr)
			{
				throw "There was an error in the sample name for: " + sampleName;
			}
			alignmentPtrs.push_back(std::make_shared< BamAlignment >(bamAlignment, samplePtr));
		}
		// std::this_thread::sleep_for(std::chrono::milliseconds(10000));
		if (m_alignment_reader_manager_ptr != nullptr)
		{
			m_alignment_reader_manager_ptr->checkinReader(this->shared_from_this());
		}

		// std::cout << "got reads: " << regionPtr->getRegionString() << " " << alignmentPtrs.size() << std::endl;
		return alignmentPtrs;
	}

Exemple #13

0

Afficher le fichier

Fichier : bamParser.cpp Projet : drestion/peakranger

void bamParser::insertRead(const BamTools::BamAlignment& read, Reads& reads,
		string& chr) {
	int32_t loc = read.Position;
	bool dir;

	dir = (read.IsReverseStrand() ? false : true);
	if (loc > 0) {
		uint32_t tmp = (uint32_t) loc;
		if (dir) {
			reads.pos_reads.insertRead(chr, tmp);
		} else {
			reads.neg_reads.insertRead(chr, tmp);
		}
	}
}

Exemple #14

0

Afficher le fichier

Fichier : ReadContainer.cpp Projet : roland-ewald/lobstr-code

bool ReadContainer::GetIntBamTag(const BamTools::BamAlignment& aln,
		  const std::string& tag_name, int* destination) {
  char tag_type;
  if (!aln.GetTagType(tag_name, tag_type)) {return false;}
  switch (tag_type) {
  case (BamTools::Constants::BAM_TAG_TYPE_INT32):
    return aln.GetTag(tag_name, *destination);
  case (BamTools::Constants::BAM_TAG_TYPE_INT8):
    int8_t d8;
    if (!aln.GetTag(tag_name, d8)) {
      return false;
    }
    *destination = static_cast<int>(d8);
    return true;
  case (BamTools::Constants::BAM_TAG_TYPE_UINT8):
    uint8_t ud8;
    if (!aln.GetTag(tag_name, ud8)) {
      return false;
    }
    *destination = static_cast<int>(ud8);
    return true;
  case (BamTools::Constants::BAM_TAG_TYPE_INT16):
    int16_t d16;
    if (!aln.GetTag(tag_name, d16)) {
      return false;
    }
    *destination = static_cast<int>(d16);
    return true;
  case (BamTools::Constants::BAM_TAG_TYPE_UINT16):
    uint16_t ud16;
    if (!aln.GetTag(tag_name, ud16)) {
      return false;
    }
    *destination = static_cast<int>(ud16);
    return true;
  case (BamTools::Constants::BAM_TAG_TYPE_UINT32):
    uint32_t ud32;
    if (!aln.GetTag(tag_name, ud32)) {
      return false;
    }
    *destination = static_cast<int>(ud32);
    return true;
  default:
    stringstream msg;
    msg << "Encountered unsupported tag type " << tag_type;
    PrintMessageDieOnError(msg.str(), ERROR);
  }
  return false;
}

Exemple #15

0

Afficher le fichier

Fichier : BamHelper.cpp Projet : hgy851018/TS

bool getTagParanoid(BamTools::BamAlignment &alignment, const std::string &tag, int64_t &value) {
	char tagType = ' ';
	if(alignment.GetTagType(tag, tagType)) {
		switch(tagType) {
			case BamTools::Constants::BAM_TAG_TYPE_INT8: {
				int8_t value_int8 = 0;
				alignment.GetTag(tag, value_int8);
				value = value_int8;
			} break;
			case BamTools::Constants::BAM_TAG_TYPE_UINT8: {
				uint8_t value_uint8 = 0;
				alignment.GetTag(tag, value_uint8);
				value = value_uint8;
			} break;
			case BamTools::Constants::BAM_TAG_TYPE_INT16: {
				int16_t value_int16 = 0;
				alignment.GetTag(tag, value_int16);
				value = value_int16;
			} break;
			case BamTools::Constants::BAM_TAG_TYPE_UINT16: {
				uint16_t value_uint16 = 0;
				alignment.GetTag(tag, value_uint16);
				value = value_uint16;
			} break;
			case BamTools::Constants::BAM_TAG_TYPE_INT32: {
				int32_t value_int32 = 0;
				alignment.GetTag(tag, value_int32);
				value = value_int32;
			} break;
			case BamTools::Constants::BAM_TAG_TYPE_UINT32: {
				uint32_t value_uint32 = 0;
				alignment.GetTag(tag, value_uint32);
				value = value_uint32;
			} break;
			default: {
				alignment.GetTag(tag, value);
			} break;
		}
		return(true);
	} else {
		return(false);
	}
}

Exemple #16

0

Afficher le fichier

Fichier : Config.cpp Projet : ffinfo/Picl

void Config::InitializationClustering() {
    struct stat st;
    if(stat(Workspace.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Workspace directory already present");
    else if (mkdir(Workspace.c_str(), 0755) != 0) {
        Log("[Error] Could not create workspace directory: " + Workspace);
        exit(1);
    }
    RunningTasksFile = Workspace + "/" + FilePrefix + "running.tasks";
    StatsFile = Workspace + "/" + FilePrefix + "stats";
    BinClusterFile = Workspace + "/" + FilePrefix + "bpc";
    clusterFile = new ClusterFile(BinClusterFile);
    clusterDir = Workspace + "/clusters/";
    if(stat(clusterDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Cluster directory already present");
    else if (mkdir(clusterDir.c_str(), 0755) != 0) {
        Log("[Error] Could not create cluster directory: " + clusterDir);
        exit(1);
    }
    insertsizeDir = Workspace + "/insertsize/";
    if(stat(insertsizeDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Insertsize directory already present");
    else if (mkdir(insertsizeDir.c_str(), 0755) != 0) {
        Log("[Error] Could not create insertsize directory: " + insertsizeDir);
        exit(1);
    }
    coverageDir = Workspace + "/coverage/";
    if(stat(coverageDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Coverage directory already present");
    else if (mkdir(coverageDir.c_str(), 0755) != 0) {
        Log("[Error] Could not create coverage directory: " + coverageDir);
        exit(1);
    }
    
    if (!ForwardBam.empty() && !ReverseBam.empty() && PairedBam.empty()) {
        UsePairedBam = false;
    } else if (ForwardBam.empty() && ReverseBam.empty() && !PairedBam.empty()) {
        UsePairedBam = true;
    } else {
        Log("[Error] No correct bam file(s)");
        exit(1);
    }
    
    BamTools::BamAlignment alignment;
    BamTools::BamReader BamReader;
    
    if (UsePairedBam) {
        BamReader.Open(PairedBam);
        if (not BamReader.IsOpen()) {
            Log("[Error] Could not open paired bam");
            exit(1);
        }
        if (PairedIndex.empty()) {
            if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) {
                PairedIndex = PairedBam.substr(0,PairedBam.find_last_of(".bam")-3) + ".bai";
                BamReader.OpenIndex(PairedIndex);
            }
            if (not BamReader.HasIndex()) {
                Log("[Error] No index for bamfile");
                exit(1);
            }
        }
        BamTools::SamHeader header = BamReader.GetHeader();
        for (BamTools::SamReadGroupIterator it = header.ReadGroups.Begin(); it != header.ReadGroups.End(); it++) {
            BamTools::SamReadGroup* readgroup = &*it;
            readNameConverter.TrimName(readgroup->ID);
            readNameConverter.AddReadGroup(readgroup->ID);
        }
        long int count = 0;
        while (BamReader.GetNextAlignment(alignment)) {
            string RG;
            if (alignment.GetTag("RG", RG)) {
                if (not NameTrim.empty()) readNameConverter.TrimName(RG);
                if (readNameConverter.AddReadGroup(RG)) {
                    Log("[Warning] Readgroup '" + RG + "' found in reads but not in header");
                    count = 0;
                }
            }
            count++;
            if (count > 10000) break;
        }
        BamReader.Close();
    } else {
        BamReader.Open(ForwardBam);
        if (not BamReader.IsOpen()) {
            Log("[Error] Could not open first/forward bam");
            exit(1);
        }
        if (ForwardIndex.empty()) {
            if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) {
                ForwardIndex = ForwardBam.substr(0,ForwardBam.find_last_of(".bam")-3) + ".bai";
                BamReader.OpenIndex(ForwardIndex);
            }
            if (not BamReader.HasIndex()) {
                Log("[Error] No index for forward bamfile");
                exit(1);
            }
        }
        BamTools::SamHeader forwardheader = BamReader.GetHeader();
        for (BamTools::SamReadGroupIterator it = forwardheader.ReadGroups.Begin(); it != forwardheader.ReadGroups.End(); it++) {
            BamTools::SamReadGroup* readgroup = &*it;
            readNameConverter.TrimName(readgroup->ID);
            readNameConverter.AddReadGroup(readgroup->ID);
        }
        long int count = 0;
        while (BamReader.GetNextAlignment(alignment)) {
            string RG;
            if (alignment.GetTag("RG", RG)) {
                if (!NameTrim.empty()) readNameConverter.TrimName(RG);
                if (readNameConverter.AddReadGroup(RG)) {
                    Log("[Warning] Readgroup '" + RG + "' found in forward reads but not in header");
                    count = 0;
                }
            }
            count++;
            if (count > 10000) break;
        }
        BamReader.Close();
        BamReader.Open(ReverseBam);
        if (not BamReader.IsOpen()) {
            Log("[Error] Could not open second/reverse bam");
            exit(1);
        }
        if (ReverseIndex.empty()) {
            if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) {
                ReverseIndex = ReverseBam.substr(0,ReverseBam.find_last_of(".bam")-3) + ".bai";
                BamReader.OpenIndex(ReverseIndex);
            }
            if (not BamReader.HasIndex()) {
                Log("[Error] No index for reverse bamfile");
                exit(1);
            }
        }
        BamTools::SamHeader reverseheader = BamReader.GetHeader();
        for (BamTools::SamReadGroupIterator it = reverseheader.ReadGroups.Begin(); it != reverseheader.ReadGroups.End(); it++) {
            BamTools::SamReadGroup* readgroup = &*it;
            readNameConverter.TrimName(readgroup->ID);
            if (readNameConverter.AddReadGroup(readgroup->ID)) {
                Log("[Warning] Readgroup '" + readgroup->ID + "' found in reverse but not in forward");
            }
        }
        count = 0;
        while (BamReader.GetNextAlignment(alignment)) {
            string RG;
            if (alignment.GetTag("RG", RG)) {
                if (!NameTrim.empty()) readNameConverter.TrimName(RG);
                if (readNameConverter.AddReadGroup(RG)) {
                    Log("[Warning] Readgroup '" + RG + "' found in reverse reads but not in header");
                    count = 0;
                } 
            }
            count++;
            if (count > 10000) break;
        }
        BamReader.Close();
    }
    
    for(map<string, int>::iterator it = readNameConverter.ReadGroups.begin(); it!=readNameConverter.ReadGroups.end(); ++it) {
        ostringstream logBuffer;
        logBuffer << "Readgroup found: " << it->second << " - " << it->first;
        Log(logBuffer.str());
    }
    
    writeConfigFile(Workspace + FilePrefix + "config");
}

Exemple #17

0

Afficher le fichier

Fichier : somatic-variant-filters.cpp Projet : snewhouse/sga

CoverageStats getVariantCoverage(BamTools::BamReader* pReader, const VCFRecord& record, const ReadTable* refTable)
{
    CoverageStats stats;
    
    static const int flankingSize = 100;
    static const double minPercentIdentity = 95.0f;

    bool is_snv = record.refStr.size() == 1 && record.varStr.size() == 1;

    // Grab the reference haplotype
    int eventLength = record.varStr.length();
    int zeroBasedPos = record.refPosition - 1;
    int start = zeroBasedPos - flankingSize - 1;
    if(start < 0)
        start = 0;

    int end = zeroBasedPos + eventLength + 2 * flankingSize;
    const SeqItem& chr = refTable->getRead(record.refName);
    if(end > (int)chr.seq.length())
        end = (int)chr.seq.length();

    std::string reference_haplotype = chr.seq.substr(start, end - start);
    int translatedPos = zeroBasedPos - start;

    std::string variant_haplotype = reference_haplotype;
    
    // Ensure that the reference string at the variant matches the expected
    assert(variant_haplotype.substr(translatedPos, record.refStr.length()) == record.refStr);
    variant_haplotype.replace(translatedPos, record.refStr.length(), record.varStr);

    // Grab all reads in reference region
    int refID = pReader->GetReferenceID(record.refName);
    if(refID < 0)
        return stats;

    int refStart = record.refPosition;
    int refEnd = record.refPosition;
    pReader->SetRegion(refID, refStart, refID, refEnd);
    BamTools::BamAlignment aln;

    std::vector<double> mapping_quality;
    std::vector<BamTools::BamAlignment> alignments;
    while(pReader->GetNextAlignment(aln)) {
        if(aln.MapQuality > 0)
            alignments.push_back(aln);
        mapping_quality.push_back(aln.MapQuality);
    }

    if(!mapping_quality.empty())
        stats.median_mapping_quality = median(mapping_quality);
    else
        stats.median_mapping_quality = 60;

    // Shuffle and take the first 200 alignments only
    std::random_shuffle(alignments.begin(), alignments.end());

    for(size_t i = 0; i < alignments.size() && i < opt::capAlignments; ++i) {
        BamTools::BamAlignment alignment = alignments[i];

        VariantReadSegments segments = splitReadAtVariant(alignment, record);

        if(opt::verbose > 1)
        {
            fprintf(stderr, "var: %zu %s -> %s\n",  record.refPosition, record.refStr.c_str(), record.varStr.c_str());
            fprintf(stderr, "pos: %d\n",  alignment.Position);
            fprintf(stderr, "strand: %s\n", alignment.IsReverseStrand() ? "-" : "+");
            fprintf(stderr, "read: %s\n", alignment.QueryBases.c_str());
            fprintf(stderr, "qual: %s\n", alignment.Qualities.c_str());
            fprintf(stderr, "alnb: %s\n", alignment.AlignedBases.c_str());
            
            fprintf(stderr, "Pre: %s\n",  segments.preSegment.c_str());
            fprintf(stderr, "Var: %s\n",  segments.variantSegment.c_str());
            fprintf(stderr, "Pos: %s\n",  segments.postSegment.c_str());
            
            fprintf(stderr, "PreQual: %s\n",  segments.preQual.c_str());
            fprintf(stderr, "VarQual: %s\n",  segments.variantQual.c_str());
            fprintf(stderr, "PosQual: %s\n",  segments.postQual.c_str());
        }

        bool aligned_at_variant = segments.variantSegment.size() > 0 && 
                                  (segments.preSegment.size() > 0 || segments.postSegment.size() > 0);

        if(!aligned_at_variant)
            continue;
                                        
        stats.n_total_reads += 1;
        
        if(segments.variantSegment == record.refStr)
            continue; // not an evidence read

        // Align the read to the reference and variant haplotype
        SequenceOverlap ref_overlap = Overlapper::computeOverlapAffine(alignment.QueryBases, reference_haplotype);
        SequenceOverlap var_overlap = Overlapper::computeOverlapAffine(alignment.QueryBases, variant_haplotype);
        
        bool quality_alignment = (ref_overlap.getPercentIdentity() >= minPercentIdentity || 
                                 var_overlap.getPercentIdentity() >= minPercentIdentity);

        bool is_evidence_read = quality_alignment && var_overlap.score > ref_overlap.score;
        if(is_evidence_read)
        {
            stats.n_evidence_reads += 1;
            if(is_snv && segments.variantQual.size() == 1)
            {
                char qb = segments.variantQual[0];
                int q = Quality::char2phred(qb);
                stats.snv_evidence_quals.push_back(q);
            }
        }
    }

    return stats;
}

Exemple #18

0

Afficher le fichier

void AmpliconRegionStatistics::TrackReadsOnRegion( const BamTools::BamAlignment &aread, uint32_t endPos )
{
	// pseudo-random number generator 'seed' for resolving equivalent read assignments
	static uint16_t clockSeed = 0;
	// check/set first region read overlaps
	uint32_t readSrt = aread.Position + 1;
	uint32_t readEnd = endPos ? endPos : aread.GetEndPosition();
	uint32_t covType = ReadOnRegion( aread.RefID, readSrt, readEnd );
	// maintain base method of tracking total reads
	TargetContig *contig = m_contigList[m_rcovContigIdx];
	bool isRev = aread.IsReverseStrand();
	if( isRev ) {
		++contig->revReads;
	} else {
		++contig->fwdReads;
	}
	// Tracking of reads on target
	if( covType & 1 ) {
		// iterate over all regions overlapping read...
		int32_t bestEndDist = -m_maxUpstreamPrimerStart;
		int32_t bestOverlap = 0;
		uint32_t numBestRegions = 0;
		bool haveBestEnd = false;
		for( TargetRegion *cur = m_rcovRegion; cur; cur = cur->next ) {
			if( readEnd < cur->trgSrt ) break;
			if( readSrt > m_rcovRegion->trgEnd ) continue;
			// save stats for all overlapped reads
			++(GetStats(cur)->overlaps);
			// find most likely AmpliSeq primed region of those overlapped
			// NOTE: can still be wrong for regions starting very close together, given 5' digestion uncertainty,
			// coupled with read length and digestion uncertainty at 3'
			int32_t dSrt = readSrt - cur->trgSrt;
			int32_t dEnd = cur->trgEnd - readEnd;
			int32_t endDist5p = isRev ? dEnd : dSrt;
			// for non-amplicon reads, ends are ignored and only maximum overlap is employed to distinguish target region
			if( m_ampliconReads ) {
				// always select region that is closest start before 5p primer
				if( endDist5p < 0 && endDist5p > bestEndDist ) {
					haveBestEnd = true;
					bestEndDist = endDist5p;
					bestOverlap = 0; // force record best below
				} else if( haveBestEnd && endDist5p != bestEndDist ) {
					// region is not closer primed or same distance from false priming site
					continue;
				}
			}
			// save region based on max overlap for equivalent regions
			if( dSrt < 0 ) dSrt = 0;
			if( dEnd < 0 ) dEnd = 0;
			int32_t overlap = cur->trgEnd - cur->trgSrt - dSrt - dEnd; // +1
			if( overlap >= bestOverlap ) {
				// if overlaps also match then default to region starting most 3'
				// - cannot do better w/o knowing exact priming location, or possibly using ZA tag value
				if( overlap == bestOverlap ) {
					// stack multiple equivalent solutions
					if( numBestRegions >= m_regionStackSize ) {
						// safety code - only triggered if many targets overlapping read
						m_regionStackSize <<= 1;	// *2
						m_regionStack = (TargetRegion **)realloc(
							m_regionStack, m_regionStackSize * sizeof(TargetRegion *) );
					}
				} else {
					// save new best solution - these values are the same for all equivalent solutions
					bestOverlap = overlap;
					numBestRegions = 0;
				}
				m_regionStack[numBestRegions++] = cur;
			}
		}
		// pseudo-randomly choose best region of equivalent best regions
		TargetRegion *bestRegion = m_regionStack[ clockSeed % numBestRegions ];
		bool e2e_or_cov;
		if( m_sigFacCoverage ) {
			int32_t trgLen = bestRegion->trgEnd - bestRegion->trgSrt + 1;
			e2e_or_cov = (double(bestOverlap+1)/trgLen >= m_sigFacCoverage);
		} else {
			int32_t dSrt = readSrt - bestRegion->trgSrt;
			int32_t dEnd = bestRegion->trgEnd - readEnd;
			if( dSrt < 0 ) dSrt = 0;
			if( dEnd < 0 ) dEnd = 0;
			e2e_or_cov = ((dSrt > dEnd ? dSrt : dEnd) <= m_maxE2eEndDist);
		}
		StatsData *stats = GetStats(bestRegion);
		if( isRev ) {
			++contig->revTrgReads;
			++stats->revReads;
			if( e2e_or_cov ) ++stats->rev_e2e;
		} else {
			++contig->fwdTrgReads;
			++stats->fwdReads;
			if( e2e_or_cov ) ++stats->fwd_e2e;
		}
	}
	++clockSeed;
}

Exemple #19

0

Afficher le fichier

Fichier : ReadContainer.cpp Projet : roland-ewald/lobstr-code

bool ReadContainer::ParseRead(const BamTools::BamAlignment& aln,
			      AlignedRead* aligned_read, 
			      map<pair<string,int>, string>& ref_ext_nucleotides) {
  // get read ID
  aligned_read->ID = aln.Name;
  // get nucleotides
  aligned_read->nucleotides = aln.QueryBases;
  // get qualities
  aligned_read->qualities = aln.Qualities;
  // get strand
  aligned_read->strand = aln.IsReverseStrand();
  // get chrom
  aligned_read->chrom = references.at(aln.RefID).RefName;
  // get read start
  aligned_read->read_start = aln.Position;
  // get cigar
  aligned_read->cigar_ops = aln.CigarData;
  // get if mate pair
  if (aln.IsSecondMate()) {
    aligned_read->mate = 1;
  } else {
    aligned_read->mate = 0;
  }
  // Only process if it is the primary alignment
  if (aligned_read->mate) {
    return false;
  }
  // Get all the tag data
  // don't process if partially spanning (from old lobSTR)
  int partial = 0;
  if (GetIntBamTag(aln, "XP", &partial)) {
    if (partial == 1) return false;
  }
  // get read group
  if (!GetStringBamTag(aln, "RG", &aligned_read->read_group)) {
    stringstream msg;
    msg << aln.Name << " Could not get read group.";
    PrintMessageDieOnError(msg.str(), ERROR);
  }
  // get msStart
  if (!GetIntBamTag(aln, "XS", &aligned_read->msStart)) {
    stringstream msg;
    msg << aln.Name << " from group " << aligned_read->read_group << " Could not get STR start coordinate. Did this bam file come from lobSTR?";
    PrintMessageDieOnError(msg.str(), ERROR);
  }
  // get msEnd
  if (!GetIntBamTag(aln, "XE", &aligned_read->msEnd)) {
    stringstream msg;
    msg << aln.Name << " from group " << aligned_read->read_group << " Could not get STR end coordinate. Did this bam file come from lobSTR?";
    PrintMessageDieOnError(msg.str(), ERROR);
  }
  // get mapq. Try unsigned/signed
  if (!GetIntBamTag(aln, "XQ", &aligned_read->mapq)) {
    stringstream msg;
    aligned_read->mapq = 0;
  }
  // get diff
  if (!GetIntBamTag(aln, "XD", &aligned_read->diffFromRef)) {
    return false;
  }
  // get mate dist
  if (!GetIntBamTag(aln, "XM", &aligned_read->matedist)) {
    aligned_read->matedist = 0;
  }
  // get STR seq
  if (!GetStringBamTag(aln, "XR", &aligned_read->repseq)) {
    stringstream msg;
    msg << aln.Name << " from group " << aligned_read->read_group << " Could not get repseq.";
    PrintMessageDieOnError(msg.str(), ERROR);
  }
  // get if stitched
  if (!GetIntBamTag(aln, "XX", &aligned_read->stitched)) {
    aligned_read->stitched = 0;
  }
  // get ref copy num
  if (!GetFloatBamTag(aln, "XC", &aligned_read->refCopyNum)) {
    stringstream msg;
    msg << aln.Name << " from group " << aligned_read->read_group << " Could not get reference copy number.";
    PrintMessageDieOnError(msg.str(), ERROR);
  }
  // get period
  aligned_read->period = aligned_read->repseq.length();
  if (include_flank) {  // diff is just sum of differences in cigar
    CIGAR_LIST cigar_list;
    for (vector<BamTools::CigarOp>::const_iterator
	   it = aligned_read->cigar_ops.begin();
	 it != aligned_read->cigar_ops.end(); it++) {
      CIGAR cig;
      cig.num = (*it).Length;
      cig.cigar_type = (*it).Type;
      cigar_list.cigars.push_back(cig);
    }
    bool added_s;
    bool cigar_had_s;
    cigar_list.ResetString();
    GenerateCorrectCigar(&cigar_list, aln.QueryBases,
			 &added_s, &cigar_had_s);
    aligned_read->diffFromRef = GetSTRAllele(cigar_list);
  }
  // apply filters
  if (unit) {
    if (aligned_read->diffFromRef % aligned_read->period != 0){ 
      filter_counter.increment(FilterCounter::NOT_UNIT);
      return false;
    }
  }
  if (abs(aligned_read->diffFromRef) > max_diff_ref) {
    filter_counter.increment(FilterCounter::DIFF_FROM_REF);
    return false;
  }
  if (aligned_read->mapq > max_mapq) {
    filter_counter.increment(FilterCounter::MAPPING_QUALITY);
    return false;
  }
  if (aligned_read->matedist > max_matedist) {
    filter_counter.increment(FilterCounter::MATE_DIST);
    return false;
  }
  // Check if the allele length is valid
  if (aligned_read->diffFromRef + (aligned_read->refCopyNum*aligned_read->period) < MIN_ALLELE_SIZE) {
    filter_counter.increment(FilterCounter::ALLELE_SIZE);
    return false;
  }

  // check that read sufficiently spans STR
  int max_read_start = aligned_read->msStart - min_border;
  int min_read_stop  = aligned_read->msEnd   + min_border;
  if (aln.Position > max_read_start || aln.GetEndPosition() < min_read_stop){
    filter_counter.increment(FilterCounter::SPANNING_AMOUNT);
    return false; 
  }
  
  // check that both ends of the read contain sufficient perfect matches
  if (min_read_end_match > 0){
    map<pair<string,int>, string>::iterator loc_iter = ref_ext_nucleotides.find(pair<string,int>(aligned_read->chrom, aligned_read->msStart));
    if (loc_iter == ref_ext_nucleotides.end())
      PrintMessageDieOnError("No extended reference sequence found for locus", ERROR);
    string ref_ext_seq = loc_iter->second;
    pair<int,int> num_end_matches = AlignmentFilters::GetNumEndMatches(aligned_read, ref_ext_seq, aligned_read->msStart-extend);
    if (num_end_matches.first < min_read_end_match || num_end_matches.second < min_read_end_match){
      filter_counter.increment(FilterCounter::NUM_END_MATCHES);
      return false;
    }
  }

  // check that the prefix and suffix of the read match maximally compared to proximal reference locations
  if (maximal_end_match_window > 0){
    map<pair<string,int>, string>::iterator loc_iter = ref_ext_nucleotides.find(pair<string,int>(aligned_read->chrom, aligned_read->msStart));
    if (loc_iter == ref_ext_nucleotides.end())
      PrintMessageDieOnError("No extended reference sequence found for locus", ERROR);
    string ref_ext_seq = loc_iter->second;
    bool maximum_end_matches = AlignmentFilters::HasLargestEndMatches(aligned_read, ref_ext_seq, aligned_read->msStart-extend, maximal_end_match_window, maximal_end_match_window);
    if (!maximum_end_matches){
      filter_counter.increment(FilterCounter::NOT_MAXIMAL_END);
      return false;
    }
  }

  // check that both ends of the aligned read have sufficient bases before the first indel
  if (min_bp_before_indel > 0){
    pair<int, int> num_bps = AlignmentFilters::GetEndDistToIndel(aligned_read);
    if (num_bps.first != -1 && num_bps.first < min_bp_before_indel){
      filter_counter.increment(FilterCounter::BP_BEFORE_INDEL);
      return false;
    }
    if (num_bps.second != -1 && num_bps.second < min_bp_before_indel){
      filter_counter.increment(FilterCounter::BP_BEFORE_INDEL);
      return false;
    }
  }
  filter_counter.increment(FilterCounter::UNFILTERED);
  return true;
}

Exemple #20

0

Afficher le fichier

Fichier : filterBAM.cpp Projet : avilella/sga

// Returns true if the paired reads are a short-insert pair
bool filterByGraph(StringGraph* pGraph, 
                   const BamTools::RefVector& referenceVector, 
                   BamTools::BamAlignment& record1, 
                   BamTools::BamAlignment& record2)
{
    std::string vertexID1 = referenceVector[record1.RefID].RefName;
    std::string vertexID2 = referenceVector[record2.RefID].RefName;

    // Get the vertices for this pair using the mapped IDs
    Vertex* pX = pGraph->getVertex(vertexID1);
    Vertex* pY = pGraph->getVertex(vertexID2);

    // Ensure that the vertices are found
    assert(pX != NULL && pY != NULL);

#ifdef DEBUG_CONNECT
    std::cout << "Finding path from " << vertexID1 << " to " << vertexID2 << "\n";
#endif

    EdgeDir walkDirectionXOut = ED_SENSE;
    EdgeDir walkDirectionYIn = ED_SENSE;

    // Flip walk directions if the alignment is to the reverse strand
    if(record1.IsReverseStrand())
        walkDirectionXOut = !walkDirectionXOut;
    
    if(record2.IsReverseStrand())
        walkDirectionYIn = !walkDirectionYIn;

    int fromX = walkDirectionXOut == ED_SENSE ? record1.Position : record1.GetEndPosition();
    int toY = walkDirectionYIn == ED_SENSE ? record2.Position : record2.GetEndPosition();

    // Calculate the amount of contig X that already covers the fragment
    // Using this number, we calculate how far we should search
    int coveredX = walkDirectionXOut == ED_SENSE ? pX->getSeqLen() - fromX : fromX;
    int maxWalkDistance = opt::maxDistance - coveredX;

    bool bShortInsertPair = false;
    if(pX == pY)
    {
        if(abs(record1.InsertSize) < opt::maxDistance)
            bShortInsertPair = true;
    }
    else
    {

        SGWalkVector walks;
        SGSearch::findWalks(pX, pY, walkDirectionXOut, maxWalkDistance, 10000, true, walks);

        if(!walks.empty())
        {
            for(size_t i = 0; i < walks.size(); ++i)
            {
                std::string fragment = walks[i].getFragmentString(pX, 
                                                                  pY, 
                                                                  fromX,
                                                                  toY,
                                                                  walkDirectionXOut,
                                                                  walkDirectionYIn);
                if((int)fragment.size() < opt::maxDistance)
                {
                    bShortInsertPair = true;
                    //std::cout << "Found completing fragment (" << pX->getID() << " -> " << pY->getID() << ": " << fragment.size() << "\n";
                    break;
                }
            }
        }
    }
    
    return bShortInsertPair;
}

Exemple #21

0

Afficher le fichier

void ReadContainer::AddReadsFromFile(const ReferenceSTR& ref_str) {
  if (ref_str.chrom != "NA") {
    int refid = -1;
    if (chrom_to_refid.find(ref_str.chrom) !=
	chrom_to_refid.end()) {
      refid = chrom_to_refid.at(ref_str.chrom);
    }
    if (refid == -1) {
      PrintMessageDieOnError("Could not locate STR reference chromosome in bam file", ERROR);
    }
    BamTools::BamRegion bam_region(refid, ref_str.start-extend, refid, ref_str.stop+extend);
    if (!reader.SetRegion(bam_region)) {
      PrintMessageDieOnError("Could not set bam region", ERROR);
    }
  }
  BamTools::BamAlignment aln;
  while (reader.GetNextAlignment(aln)) {
    AlignedRead aligned_read;
    // get read ID
    aligned_read.ID = aln.Name;
    // get nucleotides
    aligned_read.nucleotides = aln.QueryBases;
    // get qualities
    aligned_read.qualities = aln.Qualities;
    // get strand
    aligned_read.strand = aln.IsReverseStrand();
    // get chrom
    aligned_read.chrom = references.at(aln.RefID).RefName;
    // get read start
    aligned_read.read_start = aln.Position;
    // get cigar
    aligned_read.cigar_ops = aln.CigarData;
    // get if mate pair
    if (aln.IsSecondMate()) {
      aligned_read.mate = 1;
    } else {
      aligned_read.mate = 0;
    }
    // Only process if it is the primary alignment
    if (aligned_read.mate) {
      continue;
    }
    // Get all the tag data
    // don't process if partially spanning (from old lobSTR)
    int partial = 0;
    if (GetIntBamTag(aln, "XP", &partial)) {
      if (partial == 1) continue;
    }
    // get read group
    if (!GetStringBamTag(aln, "RG", &aligned_read.read_group)) {
      stringstream msg;
      msg << aln.Name << " Could not get read group.";
      PrintMessageDieOnError(msg.str(), ERROR);
    }
    // get msStart
    if (!GetIntBamTag(aln, "XS", &aligned_read.msStart)) {
      stringstream msg;
      msg << aln.Name << " from group " << aligned_read.read_group << " Could not get STR start coordinate. Did this bam file come from lobSTR?";
      PrintMessageDieOnError(msg.str(), ERROR);
    }
    // get msEnd
    if (!GetIntBamTag(aln, "XE", &aligned_read.msEnd)) {
      stringstream msg;
      msg << aln.Name << " from group " << aligned_read.read_group << " Could not get STR end coordinate. Did this bam file come from lobSTR?";
      PrintMessageDieOnError(msg.str(), ERROR);
    }
    // get mapq. Try unsigned/signed
    if (!GetIntBamTag(aln, "XQ", &aligned_read.mapq)) {
      stringstream msg;
      aligned_read.mapq = 0;
    }
    // get diff
    if (!GetIntBamTag(aln, "XD", &aligned_read.diffFromRef)) {
      if (aligned_read.mate == 0) {
	stringstream msg;
	msg << aln.Name << " from group " << aligned_read.read_group << " Could not get genotype.";
	PrintMessageDieOnError(msg.str(), ERROR);
      }
      continue;
    }
    // get mate dist
    if (!GetIntBamTag(aln, "XM", &aligned_read.matedist)) {
      aligned_read.matedist = 0;
    }
    // get STR seq
    if (!GetStringBamTag(aln, "XR", &aligned_read.repseq)) {
      stringstream msg;
      msg << aln.Name << " from group " << aligned_read.read_group << " Could not get repseq.";
      PrintMessageDieOnError(msg.str(), ERROR);
    }
    // get if stitched
    if (!GetIntBamTag(aln, "XX", &aligned_read.stitched)) {
      aligned_read.stitched = 0;
    }
    // get ref copy num
    if (!GetFloatBamTag(aln, "XC", &aligned_read.refCopyNum)) {
      stringstream msg;
      msg << aln.Name << " from group " << aligned_read.read_group << " Could not get reference copy number.";
      PrintMessageDieOnError(msg.str(), ERROR);
    }
    // get period
    aligned_read.period = aligned_read.repseq.length();
    if (include_flank) {  // diff is just sum of differences in cigar
      CIGAR_LIST cigar_list;
      for (vector<BamTools::CigarOp>::const_iterator
	     it = aligned_read.cigar_ops.begin();
	   it != aligned_read.cigar_ops.end(); it++) {
	CIGAR cig;
	cig.num = (*it).Length;
	cig.cigar_type = (*it).Type;
	cigar_list.cigars.push_back(cig);
      }
      bool added_s;
      bool cigar_had_s;
      cigar_list.ResetString();
      GenerateCorrectCigar(&cigar_list, aln.QueryBases,
			   &added_s, &cigar_had_s);
      aligned_read.diffFromRef = GetSTRAllele(cigar_list);
    }
    // apply filters
    if (unit) {
      if (aligned_read.diffFromRef % aligned_read.period  != 0) continue;
    }
    if (abs(aligned_read.diffFromRef) > max_diff_ref) {
      continue;
    }
    if (aligned_read.mapq > max_mapq) {
      continue;
    }
    if (aligned_read.matedist > max_matedist) {
      continue;
    }
    // Add to map
    pair<string, int> coord
      (aligned_read.chrom, aligned_read.msStart);
    if (aligned_str_map_.find(coord) != aligned_str_map_.end()) {
      aligned_str_map_.at(coord).push_back(aligned_read);
    } else {
      list<AlignedRead> aligned_read_list;
      aligned_read_list.push_back(aligned_read);
      aligned_str_map_.insert(pair< pair<string, int>, list<AlignedRead> >
			      (coord, aligned_read_list));
    }
  }
}

Exemple #22

0

Afficher le fichier

Fichier : BaseHypothesisEvaluator.cpp Projet : fw1121/Pandoras-Toolbox-for-Bioinformatics

// Function to fill in predicted signal values
void BaseHypothesisEvaluator(BamTools::BamAlignment    &alignment,
                             const string              &flow_order_str,
                             const string              &alt_base_hyp,
                             float                     &delta_score,
                             float                     &fit_score,
                             int                       heavy_verbose) {

    // --- Step 1: Initialize Objects and retrieve relevant tags

	delta_score = 1e5;
	fit_score   = 1e5;
	vector<string>   Hypotheses(2);
    vector<float>    measurements, phase_params;
    int              start_flow, num_flows, prefix_flow=0;

    if (not GetBamTags(alignment, flow_order_str.length(), measurements, phase_params, start_flow))
      return;
	num_flows = measurements.size();
	ion::FlowOrder flow_order(flow_order_str, num_flows);
	BasecallerRead master_read;
	master_read.SetData(measurements, flow_order.num_flows());
	TreephaserLite   treephaser(flow_order);
    treephaser.SetModelParameters(phase_params[0], phase_params[1]);

    // --- Step 2: Solve beginning of the read
    // Look at mapped vs. unmapped reads in BAM
    Hypotheses[0] = alignment.QueryBases;
    Hypotheses[1] = alt_base_hyp;
    // Safety: reverse complement reverse strand reads in mapped bam
    if (alignment.IsMapped() and alignment.IsReverseStrand()) {
      RevComplementInPlace(Hypotheses[0]);
      RevComplementInPlace(Hypotheses[1]);
    }

    prefix_flow = GetMasterReadPrefix(treephaser, flow_order, start_flow, Hypotheses[0], master_read);
    unsigned int prefix_size = master_read.sequence.size();

    // --- Step 3: creating predictions for the individual hypotheses

    vector<BasecallerRead> hypothesesReads(Hypotheses.size());
    vector<float> squared_distances(Hypotheses.size(), 0.0);
    int max_last_flow = 0;

    for (unsigned int i_hyp=0; i_hyp<hypothesesReads.size(); ++i_hyp) {

      hypothesesReads[i_hyp] = master_read;
      // --- add hypothesis sequence to clipped prefix
      unsigned int i_base = 0;
      int i_flow = prefix_flow;

      while (i_base<Hypotheses[i_hyp].length() and i_base<(2*(unsigned int)flow_order.num_flows()-prefix_size)) {
        while (i_flow < flow_order.num_flows() and flow_order.nuc_at(i_flow) != Hypotheses[i_hyp][i_base])
          i_flow++;
        if (i_flow < flow_order.num_flows() and i_flow > max_last_flow)
          max_last_flow = i_flow;
        if (i_flow >= flow_order.num_flows())
          break;
        // Add base to sequence only if it fits into flow order
        hypothesesReads[i_hyp].sequence.push_back(Hypotheses[i_hyp][i_base]);
        i_base++;
      }
      i_flow = min(i_flow, flow_order.num_flows()-1);

      // Solver simulates beginning of the read and then fills in the remaining clipped bases for which we have flow information
      treephaser.Solve(hypothesesReads[i_hyp], num_flows, i_flow);
    }
    // Compute L2-distance of measurements and predictions
    for (unsigned int i_hyp=0; i_hyp<hypothesesReads.size(); ++i_hyp) {
      for (int iFlow=0; iFlow<=max_last_flow; iFlow++)
        squared_distances[i_hyp] += (measurements.at(iFlow) - hypothesesReads[i_hyp].prediction.at(iFlow)) *
                                    (measurements.at(iFlow) - hypothesesReads[i_hyp].prediction.at(iFlow));
    }

    // Delta: L2-distance of alternative base Hypothesis - L2-distance of bases as called
    delta_score = squared_distances.at(1) - squared_distances.at(0);
    fit_score   = min(squared_distances.at(1), squared_distances.at(0));


    // --- verbose ---
    if (heavy_verbose > 1 or (delta_score < 0 and heavy_verbose > 0)) {
      cout << "Processed read " << alignment.Name << endl;
      cout << "Delta Fit: " << delta_score << " Overall Fit: " << fit_score << endl;
      PredictionGenerationVerbose(Hypotheses, hypothesesReads, phase_params, flow_order, start_flow, prefix_size);
    }

}