// Check that all the strings in the vector align to the same coordinates // of the passed in sequence bool HapgenUtil::checkAlignmentsAreConsistent(const std::string& refString, const StringVector& queries) { if(queries.empty()) return true; // Perform local alignments of each query to the refString LocalAlignmentResultVector alignments; for(size_t i = 0; i < queries.size(); ++i) alignments.push_back(StdAlnTools::localAlignment(refString, queries[i])); size_t i = 0; for(size_t j = 1; j < alignments.size(); ++j) { if(alignments[i].targetStartIndex != alignments[j].targetStartIndex || alignments[j].targetEndIndex != alignments[j].targetEndIndex) { std::cerr << "Warning: inconsistent alignments found for haplotype realignment\n"; std::cerr << "A[" << i << "]: " << alignments[i] << "\n"; std::cerr << "A[" << j << "]: " << alignments[j] << "\n"; return false; } } return true; }
// Compute the best alignment of the haplotype collection to the reference DindelReturnCode DindelUtil::computeBestAlignment(const StringVector& inHaplotypes, const SeqItemVector& variantMates, const SeqItemVector& variantRCMates, const GraphCompareParameters& parameters, HapgenAlignment& bestAlignment) { size_t MAX_DEPTH = 2000; if(variantMates.size() + variantRCMates.size() > MAX_DEPTH) return DRC_OVER_DEPTH; // // Align the haplotypes to the reference genome to generate candidate alignments // HapgenAlignmentVector candidateAlignments; for(size_t i = 0; i < inHaplotypes.size(); ++i) HapgenUtil::alignHaplotypeToReferenceBWASW(inHaplotypes[i], parameters.referenceIndex, candidateAlignments); // Remove duplicate or bad alignment pairs HapgenUtil::coalesceAlignments(candidateAlignments); if(candidateAlignments.empty()) return DRC_NO_ALIGNMENT; // // Score each candidate alignment against the mates of all the variant reads // int bestCandidate = -1; double bestAverageScoreFrac = 0.0f; double secondBest = 0.0f; for(size_t i = 0; i < candidateAlignments.size(); ++i) { // Compute the average score of the reads' mates to the flanking sequence StringVector referenceFlanking; StringVector referenceHaplotypes; HapgenUtil::makeFlankingHaplotypes(candidateAlignments[i], parameters.pRefTable, 1000, inHaplotypes, referenceFlanking, referenceHaplotypes); // If valid flanking haplotypes could not be made, skip this alignment if(referenceFlanking.empty()) continue; // Realign the mates LocalAlignmentResultVector localAlignments = HapgenUtil::alignReadsLocally(referenceFlanking[0], variantMates); LocalAlignmentResultVector localAlignmentsRC = HapgenUtil::alignReadsLocally(referenceFlanking[0], variantRCMates); // Merge alignments localAlignments.insert(localAlignments.end(), localAlignmentsRC.begin(), localAlignmentsRC.end()); double sum = 0.0f; double count = 0.0f; for(size_t j = 0; j < localAlignments.size(); ++j) { double max_score = localAlignments[j].queryEndIndex - localAlignments[j].queryStartIndex + 1; double frac = (double)localAlignments[j].score / max_score; //printf("Score: %d frac: %lf\n", localAlignments[j].score, frac); sum += frac; count += 1; } double score = sum / count; if(score > bestAverageScoreFrac) { secondBest = bestAverageScoreFrac; bestAverageScoreFrac = score; bestCandidate = i; } else if(score > secondBest) { secondBest = score; } //printf("Alignment %zu mate-score: %lf\n", i, score); } if(bestCandidate == -1) return DRC_NO_ALIGNMENT; /* if(bestAverageScoreFrac < 0.9f) return DRC_POOR_ALIGNMENT; if(bestAverageScoreFrac - secondBest < 0.05f) return DRC_AMBIGUOUS_ALIGNMENT; */ bestAlignment = candidateAlignments[bestCandidate]; return DRC_OK; }