Exemplo n.º 1
0
// Check that all the strings in the vector align to the same coordinates
// of the passed in sequence
bool HapgenUtil::checkAlignmentsAreConsistent(const std::string& refString, const StringVector& queries)
{
    if(queries.empty())
        return true;

    // Perform local alignments of each query to the refString
    LocalAlignmentResultVector alignments;
    for(size_t i = 0; i < queries.size(); ++i)
        alignments.push_back(StdAlnTools::localAlignment(refString, queries[i]));

    size_t i = 0;
    for(size_t j = 1; j < alignments.size(); ++j)
    {
        if(alignments[i].targetStartIndex != alignments[j].targetStartIndex ||
           alignments[j].targetEndIndex != alignments[j].targetEndIndex)
        {
            std::cerr << "Warning: inconsistent alignments found for haplotype realignment\n";
            std::cerr << "A[" << i << "]: " << alignments[i] << "\n";
            std::cerr << "A[" << j << "]: " << alignments[j] << "\n";
            return false;
        }
    }

    return true;
}
Exemplo n.º 2
0
// Align a bunch of reads locally to a sequence
LocalAlignmentResultVector HapgenUtil::alignReadsLocally(const std::string& target, const SeqItemVector& reads)
{
    LocalAlignmentResultVector results;
    for(size_t i = 0; i < reads.size(); ++i)
    {
        LocalAlignmentResult fwdAR = StdAlnTools::localAlignment(target, reads[i].seq.toString());
        LocalAlignmentResult rcAR = StdAlnTools::localAlignment(target, reverseComplement(reads[i].seq.toString()));
        results.push_back(fwdAR.score > rcAR.score ? fwdAR : rcAR);
    }
    return results;
}
Exemplo n.º 3
0
// Compute the best alignment of the haplotype collection to the reference
DindelReturnCode DindelUtil::computeBestAlignment(const StringVector& inHaplotypes, 
                                                  const SeqItemVector& variantMates,
                                                  const SeqItemVector& variantRCMates,
                                                  const GraphCompareParameters& parameters,
                                                  HapgenAlignment& bestAlignment)
{
    size_t MAX_DEPTH = 2000;
    if(variantMates.size() + variantRCMates.size() > MAX_DEPTH)
        return DRC_OVER_DEPTH;

    //
    // Align the haplotypes to the reference genome to generate candidate alignments
    //
    HapgenAlignmentVector candidateAlignments;
    for(size_t i = 0; i < inHaplotypes.size(); ++i)
        HapgenUtil::alignHaplotypeToReferenceBWASW(inHaplotypes[i], parameters.referenceIndex, candidateAlignments);

    // Remove duplicate or bad alignment pairs
    HapgenUtil::coalesceAlignments(candidateAlignments);

    if(candidateAlignments.empty())
        return DRC_NO_ALIGNMENT;

    //
    // Score each candidate alignment against the mates of all the variant reads
    //
    int bestCandidate = -1;
    double bestAverageScoreFrac = 0.0f;
    double secondBest = 0.0f;
    for(size_t i = 0; i < candidateAlignments.size(); ++i)
    {
        // Compute the average score of the reads' mates to the flanking sequence
        StringVector referenceFlanking;
        StringVector referenceHaplotypes;
        HapgenUtil::makeFlankingHaplotypes(candidateAlignments[i], parameters.pRefTable, 
                                           1000, inHaplotypes, referenceFlanking, referenceHaplotypes);

        // If valid flanking haplotypes could not be made, skip this alignment
        if(referenceFlanking.empty())
            continue;

        // Realign the mates
        LocalAlignmentResultVector localAlignments = HapgenUtil::alignReadsLocally(referenceFlanking[0], variantMates);
        LocalAlignmentResultVector localAlignmentsRC = HapgenUtil::alignReadsLocally(referenceFlanking[0], variantRCMates);

        // Merge alignments
        localAlignments.insert(localAlignments.end(), localAlignmentsRC.begin(), localAlignmentsRC.end());

        double sum = 0.0f;
        double count = 0.0f;

        for(size_t j = 0; j < localAlignments.size(); ++j)
        {
            double max_score = localAlignments[j].queryEndIndex - localAlignments[j].queryStartIndex + 1;
            double frac = (double)localAlignments[j].score / max_score;
            //printf("Score: %d frac: %lf\n", localAlignments[j].score, frac);
            sum += frac;
            count += 1;
        }

        double score = sum / count;
        if(score > bestAverageScoreFrac)
        {
            secondBest = bestAverageScoreFrac;
            bestAverageScoreFrac = score;
            bestCandidate = i;
        }
        else if(score > secondBest)
        {
            secondBest = score;
        }

        //printf("Alignment %zu mate-score: %lf\n", i, score);
    }

    if(bestCandidate == -1)
        return DRC_NO_ALIGNMENT;

    /*
    if(bestAverageScoreFrac < 0.9f)
        return DRC_POOR_ALIGNMENT;

    if(bestAverageScoreFrac - secondBest < 0.05f)
        return DRC_AMBIGUOUS_ALIGNMENT;
    */
    bestAlignment = candidateAlignments[bestCandidate];
    return DRC_OK;
}