void DiagList::Copy(const DiagList &DL) { Clear(); unsigned uCount = DL.GetCount(); for (unsigned i = 0; i < uCount; ++i) Add(DL.Get(i)); }
void DiagListToDPRegionList(const DiagList &DL, DPRegionList &RL, unsigned uLengthA, unsigned uLengthB) { if (g_uDiagMargin > g_uMinDiagLength/2) Quit("Invalid parameters, diagmargin=%d must be <= 2*diaglength=%d", g_uDiagMargin, g_uMinDiagLength); unsigned uStartPosA = 0; unsigned uStartPosB = 0; const unsigned uDiagCount = DL.GetCount(); DPRegion r; for (unsigned uDiagIndex = 0; uDiagIndex < uDiagCount; ++uDiagIndex) { const Diag &d = DL.Get(uDiagIndex); assert(d.m_uLength >= g_uMinDiagLength); const unsigned uStartVertexA = d.m_uStartPosA + g_uDiagMargin - 1; const unsigned uStartVertexB = d.m_uStartPosB + g_uDiagMargin - 1; const unsigned uEndVertexA = d.m_uStartPosA + d.m_uLength - g_uDiagMargin; const unsigned uEndVertexB = d.m_uStartPosB + d.m_uLength - g_uDiagMargin; r.m_Type = DPREGIONTYPE_Rect; r.m_Rect.m_uStartPosA = uStartPosA; r.m_Rect.m_uStartPosB = uStartPosB; assert(uStartVertexA + 1 >= uStartPosA); assert(uStartVertexB + 1 >= uStartPosB); r.m_Rect.m_uLengthA = uStartVertexA + 1 - uStartPosA; r.m_Rect.m_uLengthB = uStartVertexB + 1 - uStartPosB; RL.Add(r); if (uEndVertexA > uStartVertexA + 1) { const unsigned uDiagLengthMinusCaps = uEndVertexA - uStartVertexA - 1; r.m_Type = DPREGIONTYPE_Diag; r.m_Diag.m_uStartPosA = uStartVertexA + 1; r.m_Diag.m_uStartPosB = uStartVertexB + 1; assert(uEndVertexA - uStartVertexA == uEndVertexB - uStartVertexB); r.m_Diag.m_uLength = uEndVertexA - uStartVertexA - 1; RL.Add(r); } uStartPosA = uEndVertexA; uStartPosB = uEndVertexB; } assert((int) uLengthA - (int) uStartPosA >= (int) g_uDiagMargin); assert((int) uLengthB - (int) uStartPosB >= (int) g_uDiagMargin); r.m_Type = DPREGIONTYPE_Rect; r.m_Rect.m_uStartPosA = uStartPosA; r.m_Rect.m_uStartPosB = uStartPosB; assert(uLengthA >= uStartPosA); assert(uLengthB >= uStartPosB); r.m_Rect.m_uLengthA = uLengthA - uStartPosA; r.m_Rect.m_uLengthB = uLengthB - uStartPosB; RL.Add(r); }
/// CompareDiagLists - Compare two diagnostic lists and return the difference /// between them. /// static bool CompareDiagLists(SourceManager &SourceMgr, const_diag_iterator d1_begin, const_diag_iterator d1_end, const_diag_iterator d2_begin, const_diag_iterator d2_end, const char *MsgLeftOnly, const char *MsgRightOnly) { DiagList LeftOnly; DiagList Left(d1_begin, d1_end); DiagList Right(d2_begin, d2_end); for (const_diag_iterator I = Left.begin(), E = Left.end(); I != E; ++I) { unsigned LineNo1 = SourceMgr.getInstantiationLineNumber(I->first); const std::string &Diag1 = I->second; DiagList::iterator II, IE; for (II = Right.begin(), IE = Right.end(); II != IE; ++II) { unsigned LineNo2 = SourceMgr.getInstantiationLineNumber(II->first); if (LineNo1 != LineNo2) continue; const std::string &Diag2 = II->second; if (Diag2.find(Diag1) != std::string::npos || Diag1.find(Diag2) != std::string::npos) { break; } } if (II == IE) { // Not found. LeftOnly.push_back(*I); } else { // Found. The same cannot be found twice. Right.erase(II); } } // Now all that's left in Right are those that were not matched. return PrintProblem(SourceMgr, LeftOnly.begin(), LeftOnly.end(), MsgLeftOnly) | PrintProblem(SourceMgr, Right.begin(), Right.end(), MsgRightOnly); }
// Merge diagonals that are continuations of each other with // short breaks of up to length g_uMaxDiagBreak. // In a sorted list of diagonals, we only have to check // consecutive entries. void MergeDiags(DiagList &DL) { unsigned &g_uMaxDiagBreak = getMuscleContext()->params.g_uMaxDiagBreak; return; #if DEBUG if (!DL.IsSorted()) Quit("MergeDiags: !IsSorted"); #endif // TODO: Fix this! // Breaks must be with no offset (no gaps) const unsigned uCount = DL.GetCount(); if (uCount <= 1) return; DiagList NewList; Diag MergedDiag; const Diag *ptrPrev = &DL.Get(0); for (unsigned i = 1; i < uCount; ++i) { const Diag *ptrDiag = &DL.Get(i); unsigned uBreakLength = DiagBreak(*ptrPrev, *ptrDiag); if (uBreakLength <= g_uMaxDiagBreak) { MergedDiag.m_uStartPosA = ptrPrev->m_uStartPosA; MergedDiag.m_uStartPosB = ptrPrev->m_uStartPosB; MergedDiag.m_uLength = ptrPrev->m_uLength + ptrDiag->m_uLength + uBreakLength; ptrPrev = &MergedDiag; } else { NewList.Add(*ptrPrev); ptrPrev = ptrDiag; } } NewList.Add(*ptrPrev); DL.Copy(NewList); }
/// CheckResults - This compares the expected results to those that /// were actually reported. It emits any discrepencies. Return "true" if there /// were problems. Return "false" otherwise. /// static bool CheckResults(Preprocessor &PP, const DiagList &ExpectedErrors, const DiagList &ExpectedWarnings, const DiagList &ExpectedNotes) { const DiagnosticClient *DiagClient = PP.getDiagnostics().getClient(); assert(DiagClient != 0 && "DiagChecker requires a valid TextDiagnosticBuffer"); const TextDiagnosticBuffer &Diags = static_cast<const TextDiagnosticBuffer&>(*DiagClient); SourceManager &SourceMgr = PP.getSourceManager(); // We want to capture the delta between what was expected and what was // seen. // // Expected \ Seen - set expected but not seen // Seen \ Expected - set seen but not expected bool HadProblem = false; // See if there are error mismatches. HadProblem |= CompareDiagLists(SourceMgr, ExpectedErrors.begin(), ExpectedErrors.end(), Diags.err_begin(), Diags.err_end(), "Errors expected but not seen:", "Errors seen but not expected:"); // See if there are warning mismatches. HadProblem |= CompareDiagLists(SourceMgr, ExpectedWarnings.begin(), ExpectedWarnings.end(), Diags.warn_begin(), Diags.warn_end(), "Warnings expected but not seen:", "Warnings seen but not expected:"); // See if there are note mismatches. HadProblem |= CompareDiagLists(SourceMgr, ExpectedNotes.begin(), ExpectedNotes.end(), Diags.note_begin(), Diags.note_end(), "Notes expected but not seen:", "Notes seen but not expected:"); return HadProblem; }
void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY, unsigned uLengthY, DiagList &DL) { MuscleContext *ctx = getMuscleContext(); ALPHA &g_Alpha = ctx->alpha.g_Alpha; unsigned* TuplePos = ctx->finddiagsn.TuplePos; unsigned &g_uMinDiagLength = ctx->params.g_uMinDiagLength; if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha) Quit("FindDiagsNuc: requires nucleo alphabet"); DL.Clear(); // 16 is arbitrary slop, no principled reason for this. if (uLengthX < K + 16 || uLengthY < K + 16) return; // Set A to shorter profile, B to longer const ProfPos *PA; const ProfPos *PB; unsigned uLengthA; unsigned uLengthB; bool bSwap; if (uLengthX < uLengthY) { bSwap = false; PA = PX; PB = PY; uLengthA = uLengthX; uLengthB = uLengthY; } else { bSwap = true; PA = PY; PB = PX; uLengthA = uLengthY; uLengthB = uLengthX; } #if TRACE Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB); #endif // Build tuple map for the longer profile, B if (uLengthB < K) Quit("FindDiags: profile too short"); memset(TuplePos, EMPTY, sizeof(TuplePos)); for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos) { const unsigned uTuple = GetTuple(PB, uPos); if (EMPTY == uTuple) continue; TuplePos[uTuple] = uPos; } // Find matches for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA) { const unsigned uTuple = GetTuple(PA, uPosA); if (EMPTY == uTuple) continue; const unsigned uPosB = TuplePos[uTuple]; if (EMPTY == uPosB) continue; // This tuple is found in both profiles unsigned uStartPosA = uPosA; unsigned uStartPosB = uPosB; // Try to extend the match forwards unsigned uEndPosA = uPosA + K - 1; unsigned uEndPosB = uPosB + K - 1; for (;;) { if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB) break; const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup; if (RESIDUE_GROUP_MULTIPLE == uAAGroupA) break; const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup; if (RESIDUE_GROUP_MULTIPLE == uAAGroupB) break; if (uAAGroupA != uAAGroupB) break; ++uEndPosA; ++uEndPosB; } uPosA = uEndPosA; #if TRACE { Log("Match: A %4u-%4u ", uStartPosA, uEndPosA); for (unsigned n = uStartPosA; n <= uEndPosA; ++n) Log("%c", LetterToChar(PA[n].m_uResidueGroup)); Log("\n"); Log(" B %4u-%4u ", uStartPosB, uEndPosB); for (unsigned n = uStartPosB; n <= uEndPosB; ++n) Log("%c", LetterToChar(PB[n].m_uResidueGroup)); Log("\n"); } #endif const unsigned uLength = uEndPosA - uStartPosA + 1; assert(uEndPosB - uStartPosB + 1 == uLength); if (uLength >= g_uMinDiagLength) { if (bSwap) DL.Add(uStartPosB, uStartPosA, uLength); else DL.Add(uStartPosA, uStartPosB, uLength); } } }
void FindDiags(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY, unsigned uLengthY, DiagList &DL) { if (ALPHA_Amino != g_Alpha) Quit("FindDiags: requires amino acid alphabet"); DL.Clear(); if (uLengthX < 12 || uLengthY < 12) return; // Set A to shorter profile, B to longer const ProfPos *PA; const ProfPos *PB; unsigned uLengthA; unsigned uLengthB; bool bSwap; if (uLengthX < uLengthY) { bSwap = false; PA = PX; PB = PY; uLengthA = uLengthX; uLengthB = uLengthY; } else { bSwap = true; PA = PY; PB = PX; uLengthA = uLengthY; uLengthB = uLengthX; } // Build tuple map for the longer profile, B if (uLengthB < KTUP) Quit("FindDiags: profile too short"); memset(TuplePos, EMPTY, sizeof(TuplePos)); for (unsigned uPos = 0; uPos < uLengthB - KTUP; ++uPos) { const unsigned uTuple = GetTuple(PB, uPos); if (EMPTY == uTuple) continue; TuplePos[uTuple] = uPos; } // Find matches for (unsigned uPosA = 0; uPosA < uLengthA - KTUP; ++uPosA) { const unsigned uTuple = GetTuple(PA, uPosA); if (EMPTY == uTuple) continue; const unsigned uPosB = TuplePos[uTuple]; if (EMPTY == uPosB) continue; // This tuple is found in both profiles unsigned uStartPosA = uPosA; unsigned uStartPosB = uPosB; // Try to extend the match forwards unsigned uEndPosA = uPosA + KTUP - 1; unsigned uEndPosB = uPosB + KTUP - 1; for (;;) { if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB) break; const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup; if (RESIDUE_GROUP_MULTIPLE == uAAGroupA) break; const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup; if (RESIDUE_GROUP_MULTIPLE == uAAGroupB) break; if (uAAGroupA != uAAGroupB) break; ++uEndPosA; ++uEndPosB; } uPosA = uEndPosA; #if TRACE { Log("Match: A %4u-%4u ", uStartPosA, uEndPosA); for (unsigned n = uStartPosA; n <= uEndPosA; ++n) Log("%c", 'A' + PA[n].m_uResidueGroup); Log("\n"); Log(" B %4u-%4u ", uStartPosB, uEndPosB); for (unsigned n = uStartPosB; n <= uEndPosB; ++n) Log("%c", 'A' + PB[n].m_uResidueGroup); Log("\n"); } #endif const unsigned uLength = uEndPosA - uStartPosA + 1; assert(uEndPosB - uStartPosB + 1 == uLength); if (uLength >= g_uMinDiagLength) { if (bSwap) DL.Add(uStartPosB, uStartPosA, uLength); else DL.Add(uStartPosA, uStartPosB, uLength); } } }
/// FindDiagnostics - Go through the comment and see if it indicates expected /// diagnostics. If so, then put them in a diagnostic list. /// static void FindDiagnostics(const char *CommentStart, unsigned CommentLen, DiagList &ExpectedDiags, Preprocessor &PP, SourceLocation Pos, const char *ExpectedStr) { const char *CommentEnd = CommentStart+CommentLen; unsigned ExpectedStrLen = strlen(ExpectedStr); // Find all expected-foo diagnostics in the string and add them to // ExpectedDiags. while (CommentStart != CommentEnd) { CommentStart = std::find(CommentStart, CommentEnd, 'e'); if (unsigned(CommentEnd-CommentStart) < ExpectedStrLen) return; // If this isn't expected-foo, ignore it. if (memcmp(CommentStart, ExpectedStr, ExpectedStrLen)) { ++CommentStart; continue; } CommentStart += ExpectedStrLen; // Skip whitespace. while (CommentStart != CommentEnd && isspace(CommentStart[0])) ++CommentStart; // Default, if we find the '{' now, is 1 time. int Times = 1; int Temp = 0; // In extended syntax, there could be a digit now. while (CommentStart != CommentEnd && CommentStart[0] >= '0' && CommentStart[0] <= '9') { Temp *= 10; Temp += CommentStart[0] - '0'; ++CommentStart; } if (Temp > 0) Times = Temp; // Skip whitespace again. while (CommentStart != CommentEnd && isspace(CommentStart[0])) ++CommentStart; // We should have a {{ now. if (CommentEnd-CommentStart < 2 || CommentStart[0] != '{' || CommentStart[1] != '{') { if (std::find(CommentStart, CommentEnd, '{') != CommentEnd) EmitError(PP, Pos, "bogus characters before '{{' in expected string"); else EmitError(PP, Pos, "cannot find start ('{{') of expected string"); return; } CommentStart += 2; // Find the }}. const char *ExpectedEnd = CommentStart; while (1) { ExpectedEnd = std::find(ExpectedEnd, CommentEnd, '}'); if (CommentEnd-ExpectedEnd < 2) { EmitError(PP, Pos, "cannot find end ('}}') of expected string"); return; } if (ExpectedEnd[1] == '}') break; ++ExpectedEnd; // Skip over singular }'s } std::string Msg(CommentStart, ExpectedEnd); std::string::size_type FindPos; while ((FindPos = Msg.find("\\n")) != std::string::npos) Msg.replace(FindPos, 2, "\n"); // Add is possibly multiple times. for (int i = 0; i < Times; ++i) ExpectedDiags.push_back(std::make_pair(Pos, Msg)); CommentStart = ExpectedEnd; } }
SCORE GlobalAlignDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, unsigned uLengthB, PWPath &Path) { #if LIST_DIAGS TICKS t1 = GetClockTicks(); #endif DiagList DL; if (ALPHA_Amino == g_Alpha) FindDiags(PA, uLengthA, PB, uLengthB, DL); else if (ALPHA_DNA == g_Alpha || ALPHA_RNA == g_Alpha) FindDiagsNuc(PA, uLengthA, PB, uLengthB, DL); else Quit("GlobalAlignDiags: bad alpha"); #if TRACE Log("GlobalAlignDiags, diag list:\n"); DL.LogMe(); #endif DL.Sort(); DL.DeleteIncompatible(); #if TRACE Log("After DeleteIncompatible:\n"); DL.LogMe(); #endif MergeDiags(DL); #if TRACE Log("After MergeDiags:\n"); DL.LogMe(); #endif DPRegionList RL; DiagListToDPRegionList(DL, RL, uLengthA, uLengthB); #if TRACE Log("RegionList:\n"); RL.LogMe(); #endif #if LIST_DIAGS { TICKS t2 = GetClockTicks(); unsigned uArea = RL.GetDPArea(); Log("ticks=%ld\n", (long) (t2 - t1)); Log("area=%u\n", uArea); } #endif g_dDPAreaWithoutDiags += uLengthA*uLengthB; double dDPAreaWithDiags = 0.0; const unsigned uRegionCount = RL.GetCount(); for (unsigned uRegionIndex = 0; uRegionIndex < uRegionCount; ++uRegionIndex) { const DPRegion &r = RL.Get(uRegionIndex); PWPath RegPath; if (DPREGIONTYPE_Diag == r.m_Type) { DiagToPath(r.m_Diag, RegPath); #if TRACE_PATH Log("DiagToPath, path=\n"); RegPath.LogMe(); #endif } else if (DPREGIONTYPE_Rect == r.m_Type) { const unsigned uRegStartPosA = r.m_Rect.m_uStartPosA; const unsigned uRegStartPosB = r.m_Rect.m_uStartPosB; const unsigned uRegLengthA = r.m_Rect.m_uLengthA; const unsigned uRegLengthB = r.m_Rect.m_uLengthB; const ProfPos *RegPA = PA + uRegStartPosA; const ProfPos *RegPB = PB + uRegStartPosB; dDPAreaWithDiags += uRegLengthA*uRegLengthB; GlobalAlignNoDiags(RegPA, uRegLengthA, RegPB, uRegLengthB, RegPath); #if TRACE_PATH Log("GlobalAlignNoDiags RegPath=\n"); RegPath.LogMe(); #endif OffsetPath(RegPath, uRegStartPosA, uRegStartPosB); #if TRACE_PATH Log("After offset path, RegPath=\n"); RegPath.LogMe(); #endif } else Quit("GlobalAlignDiags, Invalid region type %u", r.m_Type); AppendRegPath(Path, RegPath); #if TRACE_PATH Log("After AppendPath, path="); Path.LogMe(); #endif } #if TRACE { double dDPAreaWithoutDiags = uLengthA*uLengthB; Log("DP area with diags %.3g without %.3g pct saved %.3g %%\n", dDPAreaWithDiags, dDPAreaWithoutDiags, (1.0 - dDPAreaWithDiags/dDPAreaWithoutDiags)*100.0); } #endif g_dDPAreaWithDiags += dDPAreaWithDiags; return 0; }