C++ (Cpp) BamAlignment::IsReverseStrandの例、bamtools::BamAlignment::IsReverseStrand C++ (Cpp)の例

コード例 #1

0

ファイルを表示

ファイル: RegionCoverage.cpp プロジェクト: biocyberman/TS

void RegionCoverage::TrackReadsOnRegion( const BamTools::BamAlignment &aread, uint32_t endPos )
{
	// track total and on-target reads
	uint32_t readEnd = endPos ? endPos : aread.GetEndPosition();
	uint32_t covType = ReadOnRegion( aread.RefID, aread.Position + 1, readEnd );
	TargetContig *contig = m_contigList[m_rcovContigIdx];
	if( aread.IsReverseStrand() ) {
		++contig->fwdReads;
		if( covType & 1 ) ++contig->fwdTrgReads;
	} else {
		++contig->revReads;
		if( covType & 1 ) ++contig->revTrgReads;
	}
}

コード例 #2

0

ファイルを表示

ファイル: bamParser.cpp プロジェクト: drestion/peakranger

void bamParser::insertRead(const BamTools::BamAlignment& read, Reads& reads,
		string& chr) {
	int32_t loc = read.Position;
	bool dir;

	dir = (read.IsReverseStrand() ? false : true);
	if (loc > 0) {
		uint32_t tmp = (uint32_t) loc;
		if (dir) {
			reads.pos_reads.insertRead(chr, tmp);
		} else {
			reads.neg_reads.insertRead(chr, tmp);
		}
	}
}

コード例 #3

0

ファイルを表示

ファイル: somatic-variant-filters.cpp プロジェクト: snewhouse/sga

CoverageStats getVariantCoverage(BamTools::BamReader* pReader, const VCFRecord& record, const ReadTable* refTable)
{
    CoverageStats stats;
    
    static const int flankingSize = 100;
    static const double minPercentIdentity = 95.0f;

    bool is_snv = record.refStr.size() == 1 && record.varStr.size() == 1;

    // Grab the reference haplotype
    int eventLength = record.varStr.length();
    int zeroBasedPos = record.refPosition - 1;
    int start = zeroBasedPos - flankingSize - 1;
    if(start < 0)
        start = 0;

    int end = zeroBasedPos + eventLength + 2 * flankingSize;
    const SeqItem& chr = refTable->getRead(record.refName);
    if(end > (int)chr.seq.length())
        end = (int)chr.seq.length();

    std::string reference_haplotype = chr.seq.substr(start, end - start);
    int translatedPos = zeroBasedPos - start;

    std::string variant_haplotype = reference_haplotype;
    
    // Ensure that the reference string at the variant matches the expected
    assert(variant_haplotype.substr(translatedPos, record.refStr.length()) == record.refStr);
    variant_haplotype.replace(translatedPos, record.refStr.length(), record.varStr);

    // Grab all reads in reference region
    int refID = pReader->GetReferenceID(record.refName);
    if(refID < 0)
        return stats;

    int refStart = record.refPosition;
    int refEnd = record.refPosition;
    pReader->SetRegion(refID, refStart, refID, refEnd);
    BamTools::BamAlignment aln;

    std::vector<double> mapping_quality;
    std::vector<BamTools::BamAlignment> alignments;
    while(pReader->GetNextAlignment(aln)) {
        if(aln.MapQuality > 0)
            alignments.push_back(aln);
        mapping_quality.push_back(aln.MapQuality);
    }

    if(!mapping_quality.empty())
        stats.median_mapping_quality = median(mapping_quality);
    else
        stats.median_mapping_quality = 60;

    // Shuffle and take the first 200 alignments only
    std::random_shuffle(alignments.begin(), alignments.end());

    for(size_t i = 0; i < alignments.size() && i < opt::capAlignments; ++i) {
        BamTools::BamAlignment alignment = alignments[i];

        VariantReadSegments segments = splitReadAtVariant(alignment, record);

        if(opt::verbose > 1)
        {
            fprintf(stderr, "var: %zu %s -> %s\n",  record.refPosition, record.refStr.c_str(), record.varStr.c_str());
            fprintf(stderr, "pos: %d\n",  alignment.Position);
            fprintf(stderr, "strand: %s\n", alignment.IsReverseStrand() ? "-" : "+");
            fprintf(stderr, "read: %s\n", alignment.QueryBases.c_str());
            fprintf(stderr, "qual: %s\n", alignment.Qualities.c_str());
            fprintf(stderr, "alnb: %s\n", alignment.AlignedBases.c_str());
            
            fprintf(stderr, "Pre: %s\n",  segments.preSegment.c_str());
            fprintf(stderr, "Var: %s\n",  segments.variantSegment.c_str());
            fprintf(stderr, "Pos: %s\n",  segments.postSegment.c_str());
            
            fprintf(stderr, "PreQual: %s\n",  segments.preQual.c_str());
            fprintf(stderr, "VarQual: %s\n",  segments.variantQual.c_str());
            fprintf(stderr, "PosQual: %s\n",  segments.postQual.c_str());
        }

        bool aligned_at_variant = segments.variantSegment.size() > 0 && 
                                  (segments.preSegment.size() > 0 || segments.postSegment.size() > 0);

        if(!aligned_at_variant)
            continue;
                                        
        stats.n_total_reads += 1;
        
        if(segments.variantSegment == record.refStr)
            continue; // not an evidence read

        // Align the read to the reference and variant haplotype
        SequenceOverlap ref_overlap = Overlapper::computeOverlapAffine(alignment.QueryBases, reference_haplotype);
        SequenceOverlap var_overlap = Overlapper::computeOverlapAffine(alignment.QueryBases, variant_haplotype);
        
        bool quality_alignment = (ref_overlap.getPercentIdentity() >= minPercentIdentity || 
                                 var_overlap.getPercentIdentity() >= minPercentIdentity);

        bool is_evidence_read = quality_alignment && var_overlap.score > ref_overlap.score;
        if(is_evidence_read)
        {
            stats.n_evidence_reads += 1;
            if(is_snv && segments.variantQual.size() == 1)
            {
                char qb = segments.variantQual[0];
                int q = Quality::char2phred(qb);
                stats.snv_evidence_quals.push_back(q);
            }
        }
    }

    return stats;
}

コード例 #4

0

ファイルを表示

void AmpliconRegionStatistics::TrackReadsOnRegion( const BamTools::BamAlignment &aread, uint32_t endPos )
{
	// pseudo-random number generator 'seed' for resolving equivalent read assignments
	static uint16_t clockSeed = 0;
	// check/set first region read overlaps
	uint32_t readSrt = aread.Position + 1;
	uint32_t readEnd = endPos ? endPos : aread.GetEndPosition();
	uint32_t covType = ReadOnRegion( aread.RefID, readSrt, readEnd );
	// maintain base method of tracking total reads
	TargetContig *contig = m_contigList[m_rcovContigIdx];
	bool isRev = aread.IsReverseStrand();
	if( isRev ) {
		++contig->revReads;
	} else {
		++contig->fwdReads;
	}
	// Tracking of reads on target
	if( covType & 1 ) {
		// iterate over all regions overlapping read...
		int32_t bestEndDist = -m_maxUpstreamPrimerStart;
		int32_t bestOverlap = 0;
		uint32_t numBestRegions = 0;
		bool haveBestEnd = false;
		for( TargetRegion *cur = m_rcovRegion; cur; cur = cur->next ) {
			if( readEnd < cur->trgSrt ) break;
			if( readSrt > m_rcovRegion->trgEnd ) continue;
			// save stats for all overlapped reads
			++(GetStats(cur)->overlaps);
			// find most likely AmpliSeq primed region of those overlapped
			// NOTE: can still be wrong for regions starting very close together, given 5' digestion uncertainty,
			// coupled with read length and digestion uncertainty at 3'
			int32_t dSrt = readSrt - cur->trgSrt;
			int32_t dEnd = cur->trgEnd - readEnd;
			int32_t endDist5p = isRev ? dEnd : dSrt;
			// for non-amplicon reads, ends are ignored and only maximum overlap is employed to distinguish target region
			if( m_ampliconReads ) {
				// always select region that is closest start before 5p primer
				if( endDist5p < 0 && endDist5p > bestEndDist ) {
					haveBestEnd = true;
					bestEndDist = endDist5p;
					bestOverlap = 0; // force record best below
				} else if( haveBestEnd && endDist5p != bestEndDist ) {
					// region is not closer primed or same distance from false priming site
					continue;
				}
			}
			// save region based on max overlap for equivalent regions
			if( dSrt < 0 ) dSrt = 0;
			if( dEnd < 0 ) dEnd = 0;
			int32_t overlap = cur->trgEnd - cur->trgSrt - dSrt - dEnd; // +1
			if( overlap >= bestOverlap ) {
				// if overlaps also match then default to region starting most 3'
				// - cannot do better w/o knowing exact priming location, or possibly using ZA tag value
				if( overlap == bestOverlap ) {
					// stack multiple equivalent solutions
					if( numBestRegions >= m_regionStackSize ) {
						// safety code - only triggered if many targets overlapping read
						m_regionStackSize <<= 1;	// *2
						m_regionStack = (TargetRegion **)realloc(
							m_regionStack, m_regionStackSize * sizeof(TargetRegion *) );
					}
				} else {
					// save new best solution - these values are the same for all equivalent solutions
					bestOverlap = overlap;
					numBestRegions = 0;
				}
				m_regionStack[numBestRegions++] = cur;
			}
		}
		// pseudo-randomly choose best region of equivalent best regions
		TargetRegion *bestRegion = m_regionStack[ clockSeed % numBestRegions ];
		bool e2e_or_cov;
		if( m_sigFacCoverage ) {
			int32_t trgLen = bestRegion->trgEnd - bestRegion->trgSrt + 1;
			e2e_or_cov = (double(bestOverlap+1)/trgLen >= m_sigFacCoverage);
		} else {
			int32_t dSrt = readSrt - bestRegion->trgSrt;
			int32_t dEnd = bestRegion->trgEnd - readEnd;
			if( dSrt < 0 ) dSrt = 0;
			if( dEnd < 0 ) dEnd = 0;
			e2e_or_cov = ((dSrt > dEnd ? dSrt : dEnd) <= m_maxE2eEndDist);
		}
		StatsData *stats = GetStats(bestRegion);
		if( isRev ) {
			++contig->revTrgReads;
			++stats->revReads;
			if( e2e_or_cov ) ++stats->rev_e2e;
		} else {
			++contig->fwdTrgReads;
			++stats->fwdReads;
			if( e2e_or_cov ) ++stats->fwd_e2e;
		}
	}
	++clockSeed;
}

コード例 #5

0

ファイルを表示

ファイル: ReadContainer.cpp プロジェクト: roland-ewald/lobstr-code

bool ReadContainer::ParseRead(const BamTools::BamAlignment& aln,
			      AlignedRead* aligned_read, 
			      map<pair<string,int>, string>& ref_ext_nucleotides) {
  // get read ID
  aligned_read->ID = aln.Name;
  // get nucleotides
  aligned_read->nucleotides = aln.QueryBases;
  // get qualities
  aligned_read->qualities = aln.Qualities;
  // get strand
  aligned_read->strand = aln.IsReverseStrand();
  // get chrom
  aligned_read->chrom = references.at(aln.RefID).RefName;
  // get read start
  aligned_read->read_start = aln.Position;
  // get cigar
  aligned_read->cigar_ops = aln.CigarData;
  // get if mate pair
  if (aln.IsSecondMate()) {
    aligned_read->mate = 1;
  } else {
    aligned_read->mate = 0;
  }
  // Only process if it is the primary alignment
  if (aligned_read->mate) {
    return false;
  }
  // Get all the tag data
  // don't process if partially spanning (from old lobSTR)
  int partial = 0;
  if (GetIntBamTag(aln, "XP", &partial)) {
    if (partial == 1) return false;
  }
  // get read group
  if (!GetStringBamTag(aln, "RG", &aligned_read->read_group)) {
    stringstream msg;
    msg << aln.Name << " Could not get read group.";
    PrintMessageDieOnError(msg.str(), ERROR);
  }
  // get msStart
  if (!GetIntBamTag(aln, "XS", &aligned_read->msStart)) {
    stringstream msg;
    msg << aln.Name << " from group " << aligned_read->read_group << " Could not get STR start coordinate. Did this bam file come from lobSTR?";
    PrintMessageDieOnError(msg.str(), ERROR);
  }
  // get msEnd
  if (!GetIntBamTag(aln, "XE", &aligned_read->msEnd)) {
    stringstream msg;
    msg << aln.Name << " from group " << aligned_read->read_group << " Could not get STR end coordinate. Did this bam file come from lobSTR?";
    PrintMessageDieOnError(msg.str(), ERROR);
  }
  // get mapq. Try unsigned/signed
  if (!GetIntBamTag(aln, "XQ", &aligned_read->mapq)) {
    stringstream msg;
    aligned_read->mapq = 0;
  }
  // get diff
  if (!GetIntBamTag(aln, "XD", &aligned_read->diffFromRef)) {
    return false;
  }
  // get mate dist
  if (!GetIntBamTag(aln, "XM", &aligned_read->matedist)) {
    aligned_read->matedist = 0;
  }
  // get STR seq
  if (!GetStringBamTag(aln, "XR", &aligned_read->repseq)) {
    stringstream msg;
    msg << aln.Name << " from group " << aligned_read->read_group << " Could not get repseq.";
    PrintMessageDieOnError(msg.str(), ERROR);
  }
  // get if stitched
  if (!GetIntBamTag(aln, "XX", &aligned_read->stitched)) {
    aligned_read->stitched = 0;
  }
  // get ref copy num
  if (!GetFloatBamTag(aln, "XC", &aligned_read->refCopyNum)) {
    stringstream msg;
    msg << aln.Name << " from group " << aligned_read->read_group << " Could not get reference copy number.";
    PrintMessageDieOnError(msg.str(), ERROR);
  }
  // get period
  aligned_read->period = aligned_read->repseq.length();
  if (include_flank) {  // diff is just sum of differences in cigar
    CIGAR_LIST cigar_list;
    for (vector<BamTools::CigarOp>::const_iterator
	   it = aligned_read->cigar_ops.begin();
	 it != aligned_read->cigar_ops.end(); it++) {
      CIGAR cig;
      cig.num = (*it).Length;
      cig.cigar_type = (*it).Type;
      cigar_list.cigars.push_back(cig);
    }
    bool added_s;
    bool cigar_had_s;
    cigar_list.ResetString();
    GenerateCorrectCigar(&cigar_list, aln.QueryBases,
			 &added_s, &cigar_had_s);
    aligned_read->diffFromRef = GetSTRAllele(cigar_list);
  }
  // apply filters
  if (unit) {
    if (aligned_read->diffFromRef % aligned_read->period != 0){ 
      filter_counter.increment(FilterCounter::NOT_UNIT);
      return false;
    }
  }
  if (abs(aligned_read->diffFromRef) > max_diff_ref) {
    filter_counter.increment(FilterCounter::DIFF_FROM_REF);
    return false;
  }
  if (aligned_read->mapq > max_mapq) {
    filter_counter.increment(FilterCounter::MAPPING_QUALITY);
    return false;
  }
  if (aligned_read->matedist > max_matedist) {
    filter_counter.increment(FilterCounter::MATE_DIST);
    return false;
  }
  // Check if the allele length is valid
  if (aligned_read->diffFromRef + (aligned_read->refCopyNum*aligned_read->period) < MIN_ALLELE_SIZE) {
    filter_counter.increment(FilterCounter::ALLELE_SIZE);
    return false;
  }

  // check that read sufficiently spans STR
  int max_read_start = aligned_read->msStart - min_border;
  int min_read_stop  = aligned_read->msEnd   + min_border;
  if (aln.Position > max_read_start || aln.GetEndPosition() < min_read_stop){
    filter_counter.increment(FilterCounter::SPANNING_AMOUNT);
    return false; 
  }
  
  // check that both ends of the read contain sufficient perfect matches
  if (min_read_end_match > 0){
    map<pair<string,int>, string>::iterator loc_iter = ref_ext_nucleotides.find(pair<string,int>(aligned_read->chrom, aligned_read->msStart));
    if (loc_iter == ref_ext_nucleotides.end())
      PrintMessageDieOnError("No extended reference sequence found for locus", ERROR);
    string ref_ext_seq = loc_iter->second;
    pair<int,int> num_end_matches = AlignmentFilters::GetNumEndMatches(aligned_read, ref_ext_seq, aligned_read->msStart-extend);
    if (num_end_matches.first < min_read_end_match || num_end_matches.second < min_read_end_match){
      filter_counter.increment(FilterCounter::NUM_END_MATCHES);
      return false;
    }
  }

  // check that the prefix and suffix of the read match maximally compared to proximal reference locations
  if (maximal_end_match_window > 0){
    map<pair<string,int>, string>::iterator loc_iter = ref_ext_nucleotides.find(pair<string,int>(aligned_read->chrom, aligned_read->msStart));
    if (loc_iter == ref_ext_nucleotides.end())
      PrintMessageDieOnError("No extended reference sequence found for locus", ERROR);
    string ref_ext_seq = loc_iter->second;
    bool maximum_end_matches = AlignmentFilters::HasLargestEndMatches(aligned_read, ref_ext_seq, aligned_read->msStart-extend, maximal_end_match_window, maximal_end_match_window);
    if (!maximum_end_matches){
      filter_counter.increment(FilterCounter::NOT_MAXIMAL_END);
      return false;
    }
  }

  // check that both ends of the aligned read have sufficient bases before the first indel
  if (min_bp_before_indel > 0){
    pair<int, int> num_bps = AlignmentFilters::GetEndDistToIndel(aligned_read);
    if (num_bps.first != -1 && num_bps.first < min_bp_before_indel){
      filter_counter.increment(FilterCounter::BP_BEFORE_INDEL);
      return false;
    }
    if (num_bps.second != -1 && num_bps.second < min_bp_before_indel){
      filter_counter.increment(FilterCounter::BP_BEFORE_INDEL);
      return false;
    }
  }
  filter_counter.increment(FilterCounter::UNFILTERED);
  return true;
}

コード例 #6

0

ファイルを表示

ファイル: filterBAM.cpp プロジェクト: avilella/sga

// Returns true if the paired reads are a short-insert pair
bool filterByGraph(StringGraph* pGraph, 
                   const BamTools::RefVector& referenceVector, 
                   BamTools::BamAlignment& record1, 
                   BamTools::BamAlignment& record2)
{
    std::string vertexID1 = referenceVector[record1.RefID].RefName;
    std::string vertexID2 = referenceVector[record2.RefID].RefName;

    // Get the vertices for this pair using the mapped IDs
    Vertex* pX = pGraph->getVertex(vertexID1);
    Vertex* pY = pGraph->getVertex(vertexID2);

    // Ensure that the vertices are found
    assert(pX != NULL && pY != NULL);

#ifdef DEBUG_CONNECT
    std::cout << "Finding path from " << vertexID1 << " to " << vertexID2 << "\n";
#endif

    EdgeDir walkDirectionXOut = ED_SENSE;
    EdgeDir walkDirectionYIn = ED_SENSE;

    // Flip walk directions if the alignment is to the reverse strand
    if(record1.IsReverseStrand())
        walkDirectionXOut = !walkDirectionXOut;
    
    if(record2.IsReverseStrand())
        walkDirectionYIn = !walkDirectionYIn;

    int fromX = walkDirectionXOut == ED_SENSE ? record1.Position : record1.GetEndPosition();
    int toY = walkDirectionYIn == ED_SENSE ? record2.Position : record2.GetEndPosition();

    // Calculate the amount of contig X that already covers the fragment
    // Using this number, we calculate how far we should search
    int coveredX = walkDirectionXOut == ED_SENSE ? pX->getSeqLen() - fromX : fromX;
    int maxWalkDistance = opt::maxDistance - coveredX;

    bool bShortInsertPair = false;
    if(pX == pY)
    {
        if(abs(record1.InsertSize) < opt::maxDistance)
            bShortInsertPair = true;
    }
    else
    {

        SGWalkVector walks;
        SGSearch::findWalks(pX, pY, walkDirectionXOut, maxWalkDistance, 10000, true, walks);

        if(!walks.empty())
        {
            for(size_t i = 0; i < walks.size(); ++i)
            {
                std::string fragment = walks[i].getFragmentString(pX, 
                                                                  pY, 
                                                                  fromX,
                                                                  toY,
                                                                  walkDirectionXOut,
                                                                  walkDirectionYIn);
                if((int)fragment.size() < opt::maxDistance)
                {
                    bShortInsertPair = true;
                    //std::cout << "Found completing fragment (" << pX->getID() << " -> " << pY->getID() << ": " << fragment.size() << "\n";
                    break;
                }
            }
        }
    }
    
    return bShortInsertPair;
}

コード例 #7

0

ファイルを表示

void ReadContainer::AddReadsFromFile(const ReferenceSTR& ref_str) {
  if (ref_str.chrom != "NA") {
    int refid = -1;
    if (chrom_to_refid.find(ref_str.chrom) !=
	chrom_to_refid.end()) {
      refid = chrom_to_refid.at(ref_str.chrom);
    }
    if (refid == -1) {
      PrintMessageDieOnError("Could not locate STR reference chromosome in bam file", ERROR);
    }
    BamTools::BamRegion bam_region(refid, ref_str.start-extend, refid, ref_str.stop+extend);
    if (!reader.SetRegion(bam_region)) {
      PrintMessageDieOnError("Could not set bam region", ERROR);
    }
  }
  BamTools::BamAlignment aln;
  while (reader.GetNextAlignment(aln)) {
    AlignedRead aligned_read;
    // get read ID
    aligned_read.ID = aln.Name;
    // get nucleotides
    aligned_read.nucleotides = aln.QueryBases;
    // get qualities
    aligned_read.qualities = aln.Qualities;
    // get strand
    aligned_read.strand = aln.IsReverseStrand();
    // get chrom
    aligned_read.chrom = references.at(aln.RefID).RefName;
    // get read start
    aligned_read.read_start = aln.Position;
    // get cigar
    aligned_read.cigar_ops = aln.CigarData;
    // get if mate pair
    if (aln.IsSecondMate()) {
      aligned_read.mate = 1;
    } else {
      aligned_read.mate = 0;
    }
    // Only process if it is the primary alignment
    if (aligned_read.mate) {
      continue;
    }
    // Get all the tag data
    // don't process if partially spanning (from old lobSTR)
    int partial = 0;
    if (GetIntBamTag(aln, "XP", &partial)) {
      if (partial == 1) continue;
    }
    // get read group
    if (!GetStringBamTag(aln, "RG", &aligned_read.read_group)) {
      stringstream msg;
      msg << aln.Name << " Could not get read group.";
      PrintMessageDieOnError(msg.str(), ERROR);
    }
    // get msStart
    if (!GetIntBamTag(aln, "XS", &aligned_read.msStart)) {
      stringstream msg;
      msg << aln.Name << " from group " << aligned_read.read_group << " Could not get STR start coordinate. Did this bam file come from lobSTR?";
      PrintMessageDieOnError(msg.str(), ERROR);
    }
    // get msEnd
    if (!GetIntBamTag(aln, "XE", &aligned_read.msEnd)) {
      stringstream msg;
      msg << aln.Name << " from group " << aligned_read.read_group << " Could not get STR end coordinate. Did this bam file come from lobSTR?";
      PrintMessageDieOnError(msg.str(), ERROR);
    }
    // get mapq. Try unsigned/signed
    if (!GetIntBamTag(aln, "XQ", &aligned_read.mapq)) {
      stringstream msg;
      aligned_read.mapq = 0;
    }
    // get diff
    if (!GetIntBamTag(aln, "XD", &aligned_read.diffFromRef)) {
      if (aligned_read.mate == 0) {
	stringstream msg;
	msg << aln.Name << " from group " << aligned_read.read_group << " Could not get genotype.";
	PrintMessageDieOnError(msg.str(), ERROR);
      }
      continue;
    }
    // get mate dist
    if (!GetIntBamTag(aln, "XM", &aligned_read.matedist)) {
      aligned_read.matedist = 0;
    }
    // get STR seq
    if (!GetStringBamTag(aln, "XR", &aligned_read.repseq)) {
      stringstream msg;
      msg << aln.Name << " from group " << aligned_read.read_group << " Could not get repseq.";
      PrintMessageDieOnError(msg.str(), ERROR);
    }
    // get if stitched
    if (!GetIntBamTag(aln, "XX", &aligned_read.stitched)) {
      aligned_read.stitched = 0;
    }
    // get ref copy num
    if (!GetFloatBamTag(aln, "XC", &aligned_read.refCopyNum)) {
      stringstream msg;
      msg << aln.Name << " from group " << aligned_read.read_group << " Could not get reference copy number.";
      PrintMessageDieOnError(msg.str(), ERROR);
    }
    // get period
    aligned_read.period = aligned_read.repseq.length();
    if (include_flank) {  // diff is just sum of differences in cigar
      CIGAR_LIST cigar_list;
      for (vector<BamTools::CigarOp>::const_iterator
	     it = aligned_read.cigar_ops.begin();
	   it != aligned_read.cigar_ops.end(); it++) {
	CIGAR cig;
	cig.num = (*it).Length;
	cig.cigar_type = (*it).Type;
	cigar_list.cigars.push_back(cig);
      }
      bool added_s;
      bool cigar_had_s;
      cigar_list.ResetString();
      GenerateCorrectCigar(&cigar_list, aln.QueryBases,
			   &added_s, &cigar_had_s);
      aligned_read.diffFromRef = GetSTRAllele(cigar_list);
    }
    // apply filters
    if (unit) {
      if (aligned_read.diffFromRef % aligned_read.period  != 0) continue;
    }
    if (abs(aligned_read.diffFromRef) > max_diff_ref) {
      continue;
    }
    if (aligned_read.mapq > max_mapq) {
      continue;
    }
    if (aligned_read.matedist > max_matedist) {
      continue;
    }
    // Add to map
    pair<string, int> coord
      (aligned_read.chrom, aligned_read.msStart);
    if (aligned_str_map_.find(coord) != aligned_str_map_.end()) {
      aligned_str_map_.at(coord).push_back(aligned_read);
    } else {
      list<AlignedRead> aligned_read_list;
      aligned_read_list.push_back(aligned_read);
      aligned_str_map_.insert(pair< pair<string, int>, list<AlignedRead> >
			      (coord, aligned_read_list));
    }
  }
}

コード例 #8

0

ファイルを表示

ファイル: BaseHypothesisEvaluator.cpp プロジェクト: fw1121/Pandoras-Toolbox-for-Bioinformatics

// Function to fill in predicted signal values
void BaseHypothesisEvaluator(BamTools::BamAlignment    &alignment,
                             const string              &flow_order_str,
                             const string              &alt_base_hyp,
                             float                     &delta_score,
                             float                     &fit_score,
                             int                       heavy_verbose) {

    // --- Step 1: Initialize Objects and retrieve relevant tags

	delta_score = 1e5;
	fit_score   = 1e5;
	vector<string>   Hypotheses(2);
    vector<float>    measurements, phase_params;
    int              start_flow, num_flows, prefix_flow=0;

    if (not GetBamTags(alignment, flow_order_str.length(), measurements, phase_params, start_flow))
      return;
	num_flows = measurements.size();
	ion::FlowOrder flow_order(flow_order_str, num_flows);
	BasecallerRead master_read;
	master_read.SetData(measurements, flow_order.num_flows());
	TreephaserLite   treephaser(flow_order);
    treephaser.SetModelParameters(phase_params[0], phase_params[1]);

    // --- Step 2: Solve beginning of the read
    // Look at mapped vs. unmapped reads in BAM
    Hypotheses[0] = alignment.QueryBases;
    Hypotheses[1] = alt_base_hyp;
    // Safety: reverse complement reverse strand reads in mapped bam
    if (alignment.IsMapped() and alignment.IsReverseStrand()) {
      RevComplementInPlace(Hypotheses[0]);
      RevComplementInPlace(Hypotheses[1]);
    }

    prefix_flow = GetMasterReadPrefix(treephaser, flow_order, start_flow, Hypotheses[0], master_read);
    unsigned int prefix_size = master_read.sequence.size();

    // --- Step 3: creating predictions for the individual hypotheses

    vector<BasecallerRead> hypothesesReads(Hypotheses.size());
    vector<float> squared_distances(Hypotheses.size(), 0.0);
    int max_last_flow = 0;

    for (unsigned int i_hyp=0; i_hyp<hypothesesReads.size(); ++i_hyp) {

      hypothesesReads[i_hyp] = master_read;
      // --- add hypothesis sequence to clipped prefix
      unsigned int i_base = 0;
      int i_flow = prefix_flow;

      while (i_base<Hypotheses[i_hyp].length() and i_base<(2*(unsigned int)flow_order.num_flows()-prefix_size)) {
        while (i_flow < flow_order.num_flows() and flow_order.nuc_at(i_flow) != Hypotheses[i_hyp][i_base])
          i_flow++;
        if (i_flow < flow_order.num_flows() and i_flow > max_last_flow)
          max_last_flow = i_flow;
        if (i_flow >= flow_order.num_flows())
          break;
        // Add base to sequence only if it fits into flow order
        hypothesesReads[i_hyp].sequence.push_back(Hypotheses[i_hyp][i_base]);
        i_base++;
      }
      i_flow = min(i_flow, flow_order.num_flows()-1);

      // Solver simulates beginning of the read and then fills in the remaining clipped bases for which we have flow information
      treephaser.Solve(hypothesesReads[i_hyp], num_flows, i_flow);
    }
    // Compute L2-distance of measurements and predictions
    for (unsigned int i_hyp=0; i_hyp<hypothesesReads.size(); ++i_hyp) {
      for (int iFlow=0; iFlow<=max_last_flow; iFlow++)
        squared_distances[i_hyp] += (measurements.at(iFlow) - hypothesesReads[i_hyp].prediction.at(iFlow)) *
                                    (measurements.at(iFlow) - hypothesesReads[i_hyp].prediction.at(iFlow));
    }

    // Delta: L2-distance of alternative base Hypothesis - L2-distance of bases as called
    delta_score = squared_distances.at(1) - squared_distances.at(0);
    fit_score   = min(squared_distances.at(1), squared_distances.at(0));


    // --- verbose ---
    if (heavy_verbose > 1 or (delta_score < 0 and heavy_verbose > 0)) {
      cout << "Processed read " << alignment.Name << endl;
      cout << "Delta Fit: " << delta_score << " Overall Fit: " << fit_score << endl;
      PredictionGenerationVerbose(Hypotheses, hypothesesReads, phase_params, flow_order, start_flow, prefix_size);
    }

}