// Coalesce a set of alignments into distinct locations void HapgenUtil::coalesceAlignments(HapgenAlignmentVector& alignments) { if(alignments.empty()) return; // Sort the alignments by reference id, then position std::sort(alignments.begin(), alignments.end()); HapgenAlignmentVector outAlignments; // Iterate over the alignments in sorted order // If an alignment is distinct (=does not overlap) from the // previous alignment, add it to the output collection. // First alignment is always ok outAlignments.push_back(alignments[alignments.size()-1]); // Kees: start from back because alignments are sorted in order of increasing score for(size_t i = alignments.size()-1; i-- > 0;) { // Check this alignment against the last alignment added to the output set HapgenAlignment& prevAlign = outAlignments.back(); const HapgenAlignment& currAlign = alignments[i]; int s1 = prevAlign.position; int e1 = s1 + prevAlign.length; int s2 = currAlign.position; int e2 = s2 + currAlign.length; bool intersecting = Interval::isIntersecting(s1, e1, s2, e2); if(prevAlign.referenceID != currAlign.referenceID || !intersecting) { outAlignments.push_back(currAlign); } else { // merge the intersecting alignment into a window that covers both prevAlign.position = std::min(s1, s2); prevAlign.length = std::max(e1, e2) - prevAlign.position; } } alignments = outAlignments; }
// Compute the best alignment of the haplotype collection to the reference DindelReturnCode DindelUtil::computeBestAlignment(const StringVector& inHaplotypes, const SeqItemVector& variantMates, const SeqItemVector& variantRCMates, const GraphCompareParameters& parameters, HapgenAlignment& bestAlignment) { size_t MAX_DEPTH = 2000; if(variantMates.size() + variantRCMates.size() > MAX_DEPTH) return DRC_OVER_DEPTH; // // Align the haplotypes to the reference genome to generate candidate alignments // HapgenAlignmentVector candidateAlignments; for(size_t i = 0; i < inHaplotypes.size(); ++i) HapgenUtil::alignHaplotypeToReferenceBWASW(inHaplotypes[i], parameters.referenceIndex, candidateAlignments); // Remove duplicate or bad alignment pairs HapgenUtil::coalesceAlignments(candidateAlignments); if(candidateAlignments.empty()) return DRC_NO_ALIGNMENT; // // Score each candidate alignment against the mates of all the variant reads // int bestCandidate = -1; double bestAverageScoreFrac = 0.0f; double secondBest = 0.0f; for(size_t i = 0; i < candidateAlignments.size(); ++i) { // Compute the average score of the reads' mates to the flanking sequence StringVector referenceFlanking; StringVector referenceHaplotypes; HapgenUtil::makeFlankingHaplotypes(candidateAlignments[i], parameters.pRefTable, 1000, inHaplotypes, referenceFlanking, referenceHaplotypes); // If valid flanking haplotypes could not be made, skip this alignment if(referenceFlanking.empty()) continue; // Realign the mates LocalAlignmentResultVector localAlignments = HapgenUtil::alignReadsLocally(referenceFlanking[0], variantMates); LocalAlignmentResultVector localAlignmentsRC = HapgenUtil::alignReadsLocally(referenceFlanking[0], variantRCMates); // Merge alignments localAlignments.insert(localAlignments.end(), localAlignmentsRC.begin(), localAlignmentsRC.end()); double sum = 0.0f; double count = 0.0f; for(size_t j = 0; j < localAlignments.size(); ++j) { double max_score = localAlignments[j].queryEndIndex - localAlignments[j].queryStartIndex + 1; double frac = (double)localAlignments[j].score / max_score; //printf("Score: %d frac: %lf\n", localAlignments[j].score, frac); sum += frac; count += 1; } double score = sum / count; if(score > bestAverageScoreFrac) { secondBest = bestAverageScoreFrac; bestAverageScoreFrac = score; bestCandidate = i; } else if(score > secondBest) { secondBest = score; } //printf("Alignment %zu mate-score: %lf\n", i, score); } if(bestCandidate == -1) return DRC_NO_ALIGNMENT; /* if(bestAverageScoreFrac < 0.9f) return DRC_POOR_ALIGNMENT; if(bestAverageScoreFrac - secondBest < 0.05f) return DRC_AMBIGUOUS_ALIGNMENT; */ bestAlignment = candidateAlignments[bestCandidate]; return DRC_OK; }