Exemplo n.º 1
0
// Coalesce a set of alignments into distinct locations
void HapgenUtil::coalesceAlignments(HapgenAlignmentVector& alignments)
{
    if(alignments.empty())
        return;

    // Sort the alignments by reference id, then position
    std::sort(alignments.begin(), alignments.end());

    HapgenAlignmentVector outAlignments;

    // Iterate over the alignments in sorted order
    // If an alignment is distinct (=does not overlap) from the
    // previous alignment, add it to the output collection.
    
    // First alignment is always ok
    outAlignments.push_back(alignments[alignments.size()-1]);

    // Kees: start from back because alignments are sorted in order of increasing score
    for(size_t i = alignments.size()-1; i-- > 0;)
    {
        // Check this alignment against the last alignment added to the output set
        HapgenAlignment& prevAlign = outAlignments.back();
        const HapgenAlignment& currAlign = alignments[i];

        int s1 = prevAlign.position;
        int e1 = s1 + prevAlign.length;

        int s2 = currAlign.position;
        int e2 = s2 + currAlign.length;
        bool intersecting = Interval::isIntersecting(s1, e1, s2, e2);

        if(prevAlign.referenceID != currAlign.referenceID || !intersecting)
        {
            outAlignments.push_back(currAlign);
        }
        else
        {
            // merge the intersecting alignment into a window that covers both
            prevAlign.position = std::min(s1, s2);
            prevAlign.length = std::max(e1, e2) - prevAlign.position;
        }
    }

    alignments = outAlignments;
}
Exemplo n.º 2
0
// Compute the best alignment of the haplotype collection to the reference
DindelReturnCode DindelUtil::computeBestAlignment(const StringVector& inHaplotypes, 
                                                  const SeqItemVector& variantMates,
                                                  const SeqItemVector& variantRCMates,
                                                  const GraphCompareParameters& parameters,
                                                  HapgenAlignment& bestAlignment)
{
    size_t MAX_DEPTH = 2000;
    if(variantMates.size() + variantRCMates.size() > MAX_DEPTH)
        return DRC_OVER_DEPTH;

    //
    // Align the haplotypes to the reference genome to generate candidate alignments
    //
    HapgenAlignmentVector candidateAlignments;
    for(size_t i = 0; i < inHaplotypes.size(); ++i)
        HapgenUtil::alignHaplotypeToReferenceBWASW(inHaplotypes[i], parameters.referenceIndex, candidateAlignments);

    // Remove duplicate or bad alignment pairs
    HapgenUtil::coalesceAlignments(candidateAlignments);

    if(candidateAlignments.empty())
        return DRC_NO_ALIGNMENT;

    //
    // Score each candidate alignment against the mates of all the variant reads
    //
    int bestCandidate = -1;
    double bestAverageScoreFrac = 0.0f;
    double secondBest = 0.0f;
    for(size_t i = 0; i < candidateAlignments.size(); ++i)
    {
        // Compute the average score of the reads' mates to the flanking sequence
        StringVector referenceFlanking;
        StringVector referenceHaplotypes;
        HapgenUtil::makeFlankingHaplotypes(candidateAlignments[i], parameters.pRefTable, 
                                           1000, inHaplotypes, referenceFlanking, referenceHaplotypes);

        // If valid flanking haplotypes could not be made, skip this alignment
        if(referenceFlanking.empty())
            continue;

        // Realign the mates
        LocalAlignmentResultVector localAlignments = HapgenUtil::alignReadsLocally(referenceFlanking[0], variantMates);
        LocalAlignmentResultVector localAlignmentsRC = HapgenUtil::alignReadsLocally(referenceFlanking[0], variantRCMates);

        // Merge alignments
        localAlignments.insert(localAlignments.end(), localAlignmentsRC.begin(), localAlignmentsRC.end());

        double sum = 0.0f;
        double count = 0.0f;

        for(size_t j = 0; j < localAlignments.size(); ++j)
        {
            double max_score = localAlignments[j].queryEndIndex - localAlignments[j].queryStartIndex + 1;
            double frac = (double)localAlignments[j].score / max_score;
            //printf("Score: %d frac: %lf\n", localAlignments[j].score, frac);
            sum += frac;
            count += 1;
        }

        double score = sum / count;
        if(score > bestAverageScoreFrac)
        {
            secondBest = bestAverageScoreFrac;
            bestAverageScoreFrac = score;
            bestCandidate = i;
        }
        else if(score > secondBest)
        {
            secondBest = score;
        }

        //printf("Alignment %zu mate-score: %lf\n", i, score);
    }

    if(bestCandidate == -1)
        return DRC_NO_ALIGNMENT;

    /*
    if(bestAverageScoreFrac < 0.9f)
        return DRC_POOR_ALIGNMENT;

    if(bestAverageScoreFrac - secondBest < 0.05f)
        return DRC_AMBIGUOUS_ALIGNMENT;
    */
    bestAlignment = candidateAlignments[bestCandidate];
    return DRC_OK;
}