char ConsensusChar(const ProfPos &PP) { unsigned uMostCommonLetter = 0; FCOUNT fcMostCommon = PP.m_fcCounts[0]; bool bMoreThanOneLetter = false; bool bAnyLetter = false; for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter) { const FCOUNT fc = PP.m_fcCounts[uLetter]; if (fc > 0) { if (bAnyLetter) bMoreThanOneLetter = true; bAnyLetter = true; } if (fc > fcMostCommon) { uMostCommonLetter = uLetter; fcMostCommon = fc; } } if (!bAnyLetter) return '-'; char c = LetterToChar(uMostCommonLetter); if (bMoreThanOneLetter) return UnalignChar(c); return c; }
static char *TupleToStr(int t) { static char s[K]; for (int i = 0; i < K; ++i) { unsigned Letter = (t/(pow4(i)))%4; assert(Letter >= 0 && Letter < 4); s[K-i-1] = LetterToChar(Letter); } return s; }
void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY, unsigned uLengthY, DiagList &DL) { MuscleContext *ctx = getMuscleContext(); ALPHA &g_Alpha = ctx->alpha.g_Alpha; unsigned* TuplePos = ctx->finddiagsn.TuplePos; unsigned &g_uMinDiagLength = ctx->params.g_uMinDiagLength; if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha) Quit("FindDiagsNuc: requires nucleo alphabet"); DL.Clear(); // 16 is arbitrary slop, no principled reason for this. if (uLengthX < K + 16 || uLengthY < K + 16) return; // Set A to shorter profile, B to longer const ProfPos *PA; const ProfPos *PB; unsigned uLengthA; unsigned uLengthB; bool bSwap; if (uLengthX < uLengthY) { bSwap = false; PA = PX; PB = PY; uLengthA = uLengthX; uLengthB = uLengthY; } else { bSwap = true; PA = PY; PB = PX; uLengthA = uLengthY; uLengthB = uLengthX; } #if TRACE Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB); #endif // Build tuple map for the longer profile, B if (uLengthB < K) Quit("FindDiags: profile too short"); memset(TuplePos, EMPTY, sizeof(TuplePos)); for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos) { const unsigned uTuple = GetTuple(PB, uPos); if (EMPTY == uTuple) continue; TuplePos[uTuple] = uPos; } // Find matches for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA) { const unsigned uTuple = GetTuple(PA, uPosA); if (EMPTY == uTuple) continue; const unsigned uPosB = TuplePos[uTuple]; if (EMPTY == uPosB) continue; // This tuple is found in both profiles unsigned uStartPosA = uPosA; unsigned uStartPosB = uPosB; // Try to extend the match forwards unsigned uEndPosA = uPosA + K - 1; unsigned uEndPosB = uPosB + K - 1; for (;;) { if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB) break; const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup; if (RESIDUE_GROUP_MULTIPLE == uAAGroupA) break; const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup; if (RESIDUE_GROUP_MULTIPLE == uAAGroupB) break; if (uAAGroupA != uAAGroupB) break; ++uEndPosA; ++uEndPosB; } uPosA = uEndPosA; #if TRACE { Log("Match: A %4u-%4u ", uStartPosA, uEndPosA); for (unsigned n = uStartPosA; n <= uEndPosA; ++n) Log("%c", LetterToChar(PA[n].m_uResidueGroup)); Log("\n"); Log(" B %4u-%4u ", uStartPosB, uEndPosB); for (unsigned n = uStartPosB; n <= uEndPosB; ++n) Log("%c", LetterToChar(PB[n].m_uResidueGroup)); Log("\n"); } #endif const unsigned uLength = uEndPosA - uStartPosA + 1; assert(uEndPosB - uStartPosB + 1 == uLength); if (uLength >= g_uMinDiagLength) { if (bSwap) DL.Add(uStartPosB, uStartPosA, uLength); else DL.Add(uStartPosA, uStartPosB, uLength); } } }