Exemple #1
0
void SimpleAligner::align(dagcon::Alignment& aln) {
    // This alignment type defined in blasr code base
    blasr::Alignment initialAln, refinedAln;
    FASTQSequence query;
    query.seq = (Nucleotide*)aln.qstr.c_str();
    query.length = aln.qstr.length();

    DNASequence target;
    target.seq = (Nucleotide*)aln.tstr.c_str();
    target.length = aln.tstr.length();
    SDPAlign(query, target, distScoreFn_, tupleMetrics_.tupleSize,
             config_.sdpIndel, config_.sdpIndel, config_.indelRate*2,
             initialAln, Local);

    GuidedAlign(query, target, initialAln, distScoreFn_, 
        config_.bandSize, refinedAln);

    std::string queryStr, alignStr, targetStr;

    //StickPrintAlignment(initialAln, query, target, std::cout);
    //StickPrintAlignment(refinedAln, query, target, std::cout);

    CreateAlignmentStrings(refinedAln, query.seq, target.seq, 
            targetStr, alignStr, queryStr, query.length, target.length);

    // alignment coordinates may change, update alignment object
    aln.start += refinedAln.GenomicTBegin();
    aln.end = aln.start + refinedAln.GenomicTEnd();

    if (aln.strand == '-') {
        aln.start = aln.tlen - aln.end;
        aln.qstr = revComp(queryStr);
        aln.tstr = revComp(targetStr);
    } else {
        aln.qstr = queryStr;
        aln.tstr = targetStr;
    }
    aln.start++;
}
Exemple #2
0
void SimpleAligner::align(dagcon::Alignment& aln) {
    // This alignment type defined in blasr code base
    blasr::Alignment initialAln, refinedAln;
    FASTQSequence query;
    query.seq = (Nucleotide*)aln.qstr.c_str();
    query.length = aln.qstr.length();
    query.AllocateRichQualityValues(query.length);
    query.qual.Allocate(query.length);

    DNASequence target;
    target.seq = (Nucleotide*)aln.tstr.c_str();
    target.length = aln.tstr.length();
    SDPAlign(query, target, distScoreFn_, tupleMetrics_.tupleSize,
             config_.sdpIndel, config_.sdpIndel, config_.indelRate*2,
             initialAln, Local);

    GuidedAlign(query, target, initialAln, distScoreFn_, 
        config_.bandSize, refinedAln);

    //StickPrintAlignment(initialAln, query, target, std::cout);
    //StickPrintAlignment(refinedAln, query, target, std::cout);

    std::string queryStr, alignStr, targetStr;
    CreateAlignmentStrings(refinedAln, query.seq, target.seq, 
            targetStr, alignStr, queryStr, query.length, target.length);

    if (aln.strand == '-') {
        aln.start = aln.len - (aln.end + refinedAln.tPos);
        aln.qstr = revComp(queryStr);
        aln.tstr = revComp(targetStr);
    } else {
        aln.start += refinedAln.tPos;
        aln.qstr = queryStr;
        aln.tstr = targetStr;
    }
    aln.start++;
}
void Print(T_Alignment &alignment, T_Sequence &query, T_Sequence &text, std::ostream &out,
           int qPrintStart = 0, int tPrintStart = 0, int maxPrintLength = 50)
{
    (void)(qPrintStart);
    (void)(tPrintStart);
    (void)(maxPrintLength);
    /*
     * Sample alignment:
     *
     <hit name="x15_y33_1220208-0008_m081205_152444_Uni_p2_b15" unalignedLength="1301" start="1051" end="1016" strand="-" targetStart="1" targetEnd="44" targetStrand="+">
     <zScore value="-6.091"/>
     <nInsert value="1" percent="2.86" />
     <nDelete value="9" percent="25.71" />
     <nMismatch value="1" percent="2.86" />
     <nCorrect value="24" percent="68.57" />
     <alignment><query>
     AG--CGTTCC-TATGG-TG-GGGTCGTTA-ACT---GTCGCCAG
     </query><target>
     AGCCCG-TCCTTATGGTTGAGGGTTGTTACACTTCGGTCGCCAG
     </target></alignment>
     </hit>
     */
    char strand[2] = {'+', '-'};
    std::string tAlignStr, alignStr, qAlignStr;
    CreateAlignmentStrings(alignment, query.seq, text.seq, tAlignStr, alignStr, qAlignStr);
    int alignLength = tAlignStr.size();
    if (alignLength == 0) {
        alignLength = 1;  // Make sure there are no divide by zero.
        alignment.nIns = 0;
        alignment.nDel = 0;
        alignment.nMismatch = 0;
        alignment.nMatch = 0;
    }
    out << BeginDataEntry(
               std::string("hit"),
               CreateKeywordValuePair(std::string("name"), alignment.qName) +
                   CreateKeywordValuePair(std::string("unalignedLength"), alignment.qLength) +
                   CreateKeywordValuePair(std::string("start"), alignment.qPos) +
                   CreateKeywordValuePair(std::string("end"),
                                          alignment.qPos + alignment.qAlignLength) +
                   CreateKeywordValuePair(std::string("strand"), strand[alignment.qStrand]) +
                   CreateKeywordValuePair(std::string("targetStart"), alignment.tPos) +
                   CreateKeywordValuePair(std::string("targetEnd"),
                                          alignment.tPos + alignment.tAlignLength) +
                   CreateKeywordValuePair(std::string("targetStrand"), strand[alignment.tStrand]))
        << std::endl;
    out << CreateDataEntry(std::string("zScore"),
                           CreateKeywordValuePair(std::string("value"), alignment.zScore))
        << std::endl;
    out << CreateDataEntry(std::string("nInsert"),
                           CreateKeywordValuePair(std::string("value"), alignment.nIns) + " " +
                               CreateKeywordValuePair(std::string("percent"),
                                                      alignment.nIns * 0.5 / alignLength))
        << std::endl;
    out << CreateDataEntry(std::string("nDelete"),
                           CreateKeywordValuePair(std::string("value"), alignment.nDel) + " " +
                               CreateKeywordValuePair(std::string("percent"),
                                                      alignment.nDel * 0.5 / alignLength))
        << std::endl;
    out << CreateDataEntry(std::string("nMismatch"),
                           CreateKeywordValuePair(std::string("value"), alignment.nMismatch) + " " +
                               CreateKeywordValuePair(std::string("percent"),
                                                      alignment.nMismatch * 0.5 / alignLength))
        << std::endl;
    out << CreateDataEntry(std::string("nCorrect"),
                           CreateKeywordValuePair(std::string("value"), alignment.nMatch) + " " +
                               CreateKeywordValuePair(std::string("percent"),
                                                      alignment.nMatch * 0.5 / alignLength))
        << std::endl;

    out << CreateStartEntry(std::string("alignment"), std::string(""))
        << CreateStartEntry(std::string("query"), std::string("")) << std::endl;
    out << qAlignStr << std::endl;
    out << CreateEndEntry(std::string("query")) << std::endl;
    out << CreateStartEntry(std::string("target"), std::string("")) << std::endl;
    out << tAlignStr << std::endl;
    out << CreateEndEntry(std::string("target")) << std::endl;
    out << CreateEndEntry(std::string("alignment")) << std::endl;
    out << CreateEndEntry(std::string("hit")) << std::endl;
}
void CompareSequencesOutput::Print(T_Alignment &alignment, 
    T_QuerySequence &qseq, T_TargetSequence &tseq, 
    std::ostream &out, bool refForward) {

	std::string queryStr, alignStr, textStr;
	CreateAlignmentStrings(alignment, qseq, tseq, textStr, alignStr, queryStr);

	if (refForward == false) {
		if (alignment.qStrand == 1 and alignment.tStrand == 0) {
			DNALength alignedSeqToEnd = 0;
			//			DNALength alignedTSeqToEnd = 0;
			if (alignment.blocks.size() > 0) {
				// First compute the offset of the reverse of the substring that was aligned.
				
				alignedSeqToEnd = alignment.qLength - (alignment.qAlignedSeqPos + alignment.qAlignedSeq.length);
				DNALength alignEndToSubstrEnd = alignment.qAlignedSeq.length - (alignment.qPos + alignment.blocks[alignment.blocks.size()-1].qPos + alignment.blocks[alignment.blocks.size()-1].length);
				alignment.qPos = alignEndToSubstrEnd;
			}
			alignment.qAlignedSeqPos = alignedSeqToEnd;
			alignment.qStrand = 0;
			alignment.tStrand = 1;
							
		}	
	}
	
	PrintCompareSequencesAlignmentStats(alignment, out);
	// change the spaces in the align string to *s for easy parsing of alignment
	VectorIndex i;
	for (i = 0; i < alignStr.size(); i++ ) { 
		if (alignStr[i] == ' ') alignStr[i] = '*';
	}

	if (refForward == false and alignment.tStrand == 1) {
		//
		// Build reverse complement strings.
		//
        std::string queryStrRC, alignStrRC, textStrRC;
		queryStrRC.resize(queryStr.size());
		alignStrRC.resize(alignStr.size());
		textStrRC.resize(alignStr.size());
		
		DNALength pos;
		DNALength alignStringLength = alignStr.size();
		for (pos = 0; pos < alignStringLength; pos++ ) {
			if (queryStr[pos] != '-') {
				queryStrRC[alignStringLength-pos-1] = ReverseComplementNuc[static_cast<int>(queryStr[pos])];
			}
			else {
				queryStrRC[alignStringLength-pos-1] = '-';
			}
			alignStrRC[alignStringLength-pos-1] = alignStr[pos];
			
			if (textStr[pos] != '-') {
				textStrRC[alignStringLength-pos-1] = ReverseComplementNuc[static_cast<int>(textStr[pos])];
			}
			else {
				textStrRC[alignStringLength-pos-1] = '-';
			}
		}
		queryStr = queryStrRC;
		alignStr = alignStrRC;
		textStr  = textStrRC;
	}
					
    // Headers of m5 format are: 
    // qName qSeqLength qStart qEnd qStrand 
    // tName tSeqLength tStart tEnd tStrand
    // score numMatch numMismatch numIns numDel
    // mapQV qAlignedSeq matchPattern tAlignedSeq
	out << queryStr << " " << alignStr << " " << textStr << std::endl;
}