C++ (Cpp) T_Sequence::SubreadEnd Examples

Programming Language: C++ (Cpp)

Class/Type: T_Sequence

Method/Function: SubreadEnd

Examples at hotexamples.com: 4

C++ (Cpp) T_Sequence::SubreadEnd - 4 examples found. These are the top rated real world C++ (Cpp) examples of T_Sequence::SubreadEnd extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SubreadStart(5)

SubreadEnd(4)

SubreadLength(3)

Free(2)

CopyTitle(1)

HoleNumber(1)

MakeRC(1)

PrintAsciiQual(1)

ReferenceSubstring(1)

Example #1

Show file

File: MapBySuffixArrayImpl.hpp Project: bnbowman/blasr_libcpp

int LocateAnchorBoundsInSuffixArray(T_RefSequence &reference,
	T_SuffixArray &sa, T_Sequence &read, unsigned int minPrefixMatchLength,
	std::vector<DNALength> &matchLow, std::vector<DNALength> &matchHigh,
	std::vector<DNALength> &matchLength, AnchorParameters &params) {

    //
    // Make sure there is enough of this read to map.  Since searches
    // are keyed off of 'minPrefixMatchLength' matches, don't search
    // anything shorter than that.
    //
    if (minPrefixMatchLength > 0 and 
        read.SubreadLength() < minPrefixMatchLength) {
        return 0;
    }

    DNALength p, m;
    DNALength matchEnd = read.SubreadEnd() - minPrefixMatchLength + 1;
    DNALength numSearchedPositions = matchEnd - read.SubreadStart();

    matchLength.resize(numSearchedPositions);
    matchLow.resize(numSearchedPositions);
    matchHigh.resize(numSearchedPositions);

    std::fill(matchLength.begin(), matchLength.end(), 0);
    std::fill(matchLow.begin(), matchLow.end(), 0);
    std::fill(matchHigh.begin(), matchHigh.end(), 0);
    vector<SAIndex> lowMatchBound, highMatchBound;	

    for (m = 0, p = read.SubreadStart(); p < matchEnd; p++, m++) {
        lowMatchBound.clear(); highMatchBound.clear();
        DNALength lcpLength = sa.StoreLCPBounds(reference.seq, reference.length, 
            &read.seq[p], matchEnd - p,
            params.useLookupTable,
            params.maxLCPLength,
            //
            // Store the positions in the SA
            // that are searched.
            //
            lowMatchBound, highMatchBound, 
            params.stopMappingOnceUnique);

        //
        // Possibly print the lcp bounds for debugging
        //
        if (params.lcpBoundsOutPtr != NULL) {
            for (size_t i = 0; i < lowMatchBound.size(); i++) {
                *params.lcpBoundsOutPtr << 
                    (highMatchBound[i] - lowMatchBound[i]);
                if (i < lowMatchBound.size() - 1) {
                    *params.lcpBoundsOutPtr << " ";
                }  
            }
            *params.lcpBoundsOutPtr << endl;
        }

        //
        // Default to no match.
        //
        matchLow[m] = matchHigh[m] = matchLength[m] = 0;

        //
        // If anything was found in the suffix array:
        //
        if (lowMatchBound.size() > 0) {
            //
            // First expand the search bounds until at least
            // one match is found.
            //
            int lcpSearchLength = lowMatchBound.size();
            while (lcpSearchLength > 0 and 
                    lowMatchBound[lcpSearchLength - 1] == 
                    highMatchBound[lcpSearchLength - 1]) {
                lcpSearchLength--;
                lcpLength--;
            }
            matchLow[m]  = lowMatchBound[lcpSearchLength - 1];
            matchHigh[m] = highMatchBound[lcpSearchLength - 1];
            matchLength[m] = minPrefixMatchLength + lcpSearchLength;

            //
            // Next, apply some heuristics to the anchor generation.
            //
            // 1.1 If the suffix array match is unique, try and extend that
            // match as long as possible to ease global chaining later on.  
            //
            // 1.2 If the suffix array match is unique, but cannot be
            // extended, it probably ends in an error.  Back the search up
            // by 1.
            //
            // 2.1 If the suffix array match is not unique, return the
            // default matches, or expand the search to include more
            // matches. 
            //

            //
            // Check to see if the match was unique.
            //
            if (matchLow[m] + 1 == matchHigh[m]) {
                //
                // If the match is unique, extend for as long as possible.
                //
                lcpLength = minPrefixMatchLength + lcpSearchLength;
                long refPos    = sa.index[matchLow[m]] + lcpLength;
                long queryPos  = p + lcpLength;
                bool extensionWasPossible = false;

                while (refPos + 1 < reference.length and
                       queryPos + 1 < read.length and
                       reference.seq[refPos + 1] == read.seq[queryPos + 1] and 
                       (params.maxLCPLength == 0 or 
                        lcpLength < static_cast<DNALength>(params.maxLCPLength))) {
                    refPos++;
                    queryPos++;
                    lcpLength++;
                    extensionWasPossible = true;
                }

                if (extensionWasPossible) {
                    //
                    // Was able to extend match far into the genome, store that.
                    //
                    matchLength[m] = lcpLength;
                }
                else if (extensionWasPossible == false) {
                    //
                    // No extension was possible, indicating that this match
                    // ends at an error.  To be safe, expand search by up to
                    // 1.
                    //
                    if (lcpSearchLength > 1) {
                        lcpSearchLength = lcpSearchLength - 1;
                    }
                    matchLow[m]  = lowMatchBound[lcpSearchLength-1];
                    matchHigh[m] = highMatchBound[lcpSearchLength-1];
                    matchLength[m] = minPrefixMatchLength + lcpSearchLength;
                }
            }
            else {
                //
                // The match is not unique.  Store a possibly expanded search.
                // 
                if (lcpSearchLength > params.expand) {
                    lcpSearchLength -= params.expand;
                }
                else {
                    assert(lowMatchBound.size() > 0);
                    lcpSearchLength = 1;
                }

                //
                // There are multiple matches for this position.
                //
                matchLow[m]    = lowMatchBound[lcpSearchLength - 1];
                matchHigh[m]   = highMatchBound[lcpSearchLength - 1];
                matchLength[m] = minPrefixMatchLength + lcpSearchLength;
            }
        }
        else {
            //
            // The match is shorter than what the search is supposed to
            // expand to.  In order to avoid expanding to before the end
            // of the match list, do not set any match.
            //
            matchLow[m]    = 0;
            matchHigh[m]   = 0;
            matchLength[m] = 0;
        }

        //
        // Possibly advance a bunch of steps.
        //
        if (params.advanceExactMatches) {
            int tmp = (int)lcpLength - (int)params.expand
                      - params.advanceExactMatches;
            int advance = MAX(tmp, 0);
            p += advance;
            m += advance;
        }
    }
    return 1;
}

Example #2

Show file

File: SAMPrinterImpl.hpp Project: bnbowman/blasr_libcpp

void SAMOutput::CreateCIGARString(T_AlignmentCandidate &alignment,
        T_Sequence &read,
        std::string &cigarString,
        Clipping clipping,
        DNALength & prefixSoftClip, DNALength & suffixSoftClip, 
        DNALength & prefixHardClip, DNALength & suffixHardClip,
        bool cigarUseSeqMatch, const bool allowAdjacentIndels) {

    cigarString = "";
    // All cigarString use the no clipping core
    std::vector<int> opSize;
    std::vector<char> opChar;
    CreateNoClippingCigarOps(alignment, opSize, opChar, cigarUseSeqMatch, allowAdjacentIndels);

    // Clipping needs to be added

    if (clipping == hard) {
      SetHardClip(alignment, read, prefixHardClip, suffixHardClip);
      if (prefixHardClip > 0) {
        opSize.insert(opSize.begin(), prefixHardClip);
        opChar.insert(opChar.begin(), 'H');
      }
      if (suffixHardClip > 0) {
        opSize.push_back(suffixHardClip);
        opChar.push_back('H');
      }
      prefixSoftClip = 0;
      suffixSoftClip = 0;
    }
    if (clipping == soft or clipping == subread) {
      //
      // Even if clipping is soft, the hard clipping removes the 
      // low quality regions
      //
      if (clipping == soft) {
          prefixHardClip = read.lowQualityPrefix;
          suffixHardClip = read.lowQualitySuffix;
      }
      else if (clipping == subread) {
          prefixHardClip = std::max((DNALength) read.SubreadStart(), read.lowQualityPrefix);
          suffixHardClip = std::max((DNALength)(read.length - read.SubreadEnd()), read.lowQualitySuffix);
      }

      SetSoftClip(alignment, read, prefixHardClip, suffixHardClip, prefixSoftClip, suffixSoftClip);

      if (alignment.tStrand == 1) {
        std::swap(prefixHardClip, suffixHardClip);
        std::swap(prefixSoftClip, suffixSoftClip);
      }

      //
      // Insert the hard and soft clipping so that they are in the
      // order H then S if both exist.
      //
      if (prefixSoftClip > 0) {
        opSize.insert(opSize.begin(), prefixSoftClip);
        opChar.insert(opChar.begin(), 'S');
      }
      if (prefixHardClip > 0) {
        opSize.insert(opSize.begin(), prefixHardClip);
        opChar.insert(opChar.begin(), 'H');
      }
      
      //
      // Append the hard and soft clipping so they are in the order S
      // then H. 
      //
      if (suffixSoftClip > 0) {
        opSize.push_back(suffixSoftClip);
        opChar.push_back('S');
      }
      if (suffixHardClip > 0) {
        opSize.push_back(suffixHardClip);
        opChar.push_back('H');
      }
    }

    CigarOpsToString(opSize, opChar, cigarString);
}

Example #3

Show file

File: MapBySuffixArrayImpl.hpp Project: bnbowman/blasr_libcpp

int MapReadToGenome(T_RefSequence &reference,
    T_SuffixArray &sa, T_Sequence &read, 
    unsigned int minPrefixMatchLength,
    vector<T_MatchPos> &matchPosList,
    AnchorParameters &anchorParameters) {

    vector<DNALength> matchLow, matchHigh, matchLength;

    DNALength minMatchLen = anchorParameters.minMatchLength;
    if (read.SubreadLength() < minMatchLen) {
        matchPosList.clear();
        return 0;
    }

    LocateAnchorBoundsInSuffixArray(reference, sa, read, 
        minPrefixMatchLength, matchLow, matchHigh, matchLength,
        anchorParameters);

    //
    // Try evaluating some contexts.
    //
    DNALength pos;
    assert(matchLow.size() == matchHigh.size());

    DNASequence evalQrySeq, evalRefSeq;
    vector<Arrow> pathMat;
    vector<int> scoreMat;
    Alignment alignment;

    //
    // Do some filtering on the matches looking for overlapping matches
    // if there are any.
    //
    if (anchorParameters.removeEncompassedMatches) {
        vector<bool> removed;
        removed.resize(read.length);
        std::fill(removed.begin(), removed.end(), false);
        size_t i;
        for (i = 0; i < read.length-1; i++) {
            if (matchLength[i] == matchLength[i+1]+1) {
                removed[i+1] = true;
            }
        }
        for (i = 1; i < matchLength.size(); i++) {
            if (removed[i]) {
                matchLength[i] = matchLow[i] = matchHigh[i] = 0;
            }
        }
    }
    //
    // Now add 
    // 
    DNALength endOfMapping;
    DNALength trim = MAX(minMatchLen + 1, sa.lookupPrefixLength + 1);
    if (read.SubreadEnd() < trim) {
        endOfMapping = 0;
    }
    else {
        endOfMapping = read.SubreadEnd() - trim;
    }

    for (pos = read.SubreadStart(); pos < endOfMapping; pos++) {
        size_t matchIndex = pos - read.SubreadStart();
        assert(matchIndex < matchHigh.size());
        if (matchHigh[matchIndex] - matchLow[matchIndex] <= 
            anchorParameters.maxAnchorsPerPosition) {
            DNALength mp;
            for (mp = matchLow[matchIndex]; mp < matchHigh[matchIndex]; mp++) {
                if (matchLength[matchIndex] < minMatchLen) {
                    continue;
                }

                //
                // By default, add all anchors.
                //
                if (matchLength[matchIndex] + pos > read.length) {
                    //
                    // When doing branching, it's possible that a deletion
                    // branch finds an anchor that goes past the end of a
                    // read.  When that is the case, trim back the anchor
                    // match since this confuses downstream assertions.
                    //
                    matchLength[matchIndex] = read.length - pos;
                }
                assert(sa.index[mp] + matchLength[matchIndex] 
                    <= reference.length);

                matchPosList.push_back(ChainedMatchPos(sa.index[mp], pos,
                    matchLength[matchIndex], 
                    matchHigh[matchIndex] - matchLow[matchIndex]));
            }
        }
    }

    return matchPosList.size();
}

Example #4

Show file

File: SAMPrinterImpl.hpp Project: bnbowman/blasr_libcpp

void SAMOutput::PrintAlignment(T_AlignmentCandidate &alignment,
        T_Sequence &read,
        std::ostream &samFile,
        AlignmentContext &context,
        SupplementalQVList & qvList,
        Clipping clipping,
        bool cigarUseSeqMatch,
        const bool allowAdjacentIndels) {

    std::string cigarString;
    uint16_t flag;
    T_Sequence alignedSequence;
    DNALength prefixSoftClip = 0, suffixSoftClip = 0;
    DNALength prefixHardClip = 0, suffixHardClip = 0;

    CreateCIGARString(alignment, read, cigarString, clipping, prefixSoftClip, suffixSoftClip, prefixHardClip, suffixHardClip, cigarUseSeqMatch, allowAdjacentIndels);
    SetAlignedSequence(alignment, read, alignedSequence, clipping);
    BuildFlag(alignment, context, flag);
    samFile << alignment.qName << "\t" 
            << flag << "\t" 
            << alignment.tName << "\t";   // RNAME
    if (alignment.tStrand == 0) {
      samFile << alignment.TAlignStart() + 1 << "\t"; 
      // POS, add 1 to get 1 based coordinate system
    }
    else {
      samFile << alignment.tLength - (alignment.TAlignStart() + alignment.TEnd()) + 1 << "\t"; // includes - 1 for rev-comp,  +1 for one-based
    }
    samFile << (int) alignment.mapQV << "\t"// MAPQ
            << cigarString << "\t"; // CIGAR
      
      //
      // Determine RNEXT

    std::string rNext;
    rNext = "*";
    /*
    if (context.hasNextSubreadPos == false) {
      rNext = "*";
    }
    else {
      if (context.rNext == alignment.tName) {
        rNext = "=";
      }
      else {
        rNext = context.rNext;
      }
    }
    */
    samFile << rNext << "\t"; // RNEXT
    
    DNALength nextSubreadPos = 0;
    /*
    if (context.hasNextSubreadPos) {
      nextSubreadPos = context.nextSubreadPos + 1;
      }*/
    samFile << nextSubreadPos << "\t"; // RNEXT, add 1 for 1 based
                                           // indexing

    //DNALength tLen = alignment.GenomicTEnd() - alignment.GenomicTBegin();
    //SAM v1.5, tLen is set as 0 for single-segment template
    samFile << 0 << "\t"; // TLEN
    // Print the sequence on one line, and suppress printing the
    // newline (by setting the line length to alignedSequence.length
    (static_cast<DNASequence*>(&alignedSequence))->PrintSeq(samFile, 0);  // SEQ
    samFile << "\t";
    if (alignedSequence.qual.data != NULL && qvList.useqv == 0) {
        alignedSequence.PrintAsciiQual(samFile, 0);  // QUAL
    }
    else {
      samFile <<"*";
    }
    samFile << "\t";
    //
    // Add optional fields
    //
    samFile << "RG:Z:" << context.readGroupId << "\t";
    samFile << "AS:i:" << alignment.score << "\t";

    //
    // "RG" read group Id
    // "AS" alignment score
    // "XS" read alignment start position without counting previous soft clips (1 based) 
    // "XE" read alignment end position without counting previous soft clips (1 based) 
    // "XL" aligned read length 
    // "XQ" query sequence length
    // "XT" # of continues reads, always 1 for blasr 
    // "NM" edit distance 
    // "FI" read alignment start position (1 based) 
    //
    
    DNALength qAlignStart = alignment.QAlignStart();
    DNALength qAlignEnd = alignment.QAlignEnd();

    if (clipping == none) {
      samFile << "XS:i:" << qAlignStart + 1 << "\t";
      samFile << "XE:i:" << qAlignEnd + 1 << "\t";
    }
    else if (clipping == hard or clipping == soft or clipping == subread) {
        DNALength xs = prefixHardClip;
        DNALength xe = read.length - suffixHardClip;
        if (alignment.tStrand == 1) {
            xs = suffixHardClip;
            xe = read.length - prefixHardClip;
        }
        samFile << "XS:i:" << xs + 1 << "\t"; // add 1 for 1-based indexing in sam
        assert(read.length - suffixHardClip == prefixHardClip + alignedSequence.length);
        samFile << "XE:i:" << xe + 1 << "\t";
    }
    samFile << "YS:i:" << read.SubreadStart() << "\t";
    samFile << "YE:i:" << read.SubreadEnd() << "\t";
    samFile << "ZM:i:" << read.HoleNumber() << "\t";
    samFile << "XL:i:" << alignment.qAlignedSeq.length << "\t";
    samFile << "XT:i:1\t"; // reads are allways continuous reads, not
                        // referenced based circular consensus when
                        // output by blasr.
    samFile << "NM:i:" << context.editDist << "\t";
    samFile << "FI:i:" << alignment.qAlignedSeqPos + 1;
    // Add query sequence length
    samFile << "\t" << "XQ:i:" << alignment.qLength;

    //
	// Write out optional quality values.  If qvlist does not 
	// have any qv's signaled to print, this is a no-op.
	//
	// First transform characters that are too large to printable ones.
	qvList.FormatQVOptionalFields(alignedSequence);
	qvList.PrintQVOptionalFields(alignedSequence, samFile);

    samFile << std::endl;
}