// The XP score is the sum of the score of each pair of // sequences between two profiles which are aligned to // each other. Notice that for two given profiles aligned // in different ways, the difference in XP score must be // the same as the difference in SP score because the // score of a pair of sequences in one profile doesn't // depend on the alignment. SCORE ObjScoreXP(const MSA &msa1, const MSA &msa2) { const unsigned uColCount1 = msa1.GetColCount(); const unsigned uColCount2 = msa2.GetColCount(); if (uColCount1 != uColCount2) Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2); const unsigned uSeqCount1 = msa1.GetSeqCount(); const unsigned uSeqCount2 = msa2.GetSeqCount(); #if TRACE Log(" Score Weight Weight Total\n"); Log("---------- ------ ------ ----------\n"); #endif SCORE scoreTotal = 0; unsigned uPairCount = 0; for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1) { const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1); for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2) { const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2); const WEIGHT w = w1*w2; SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2); SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2); SCORE scorePair = scoreLetters + scoreGaps; scoreTotal += w1*w2*scorePair; ++uPairCount; #if TRACE Log("%10.2f %6.3f %6.3f %10.2f >%s >%s\n", scorePair, w1, w2, scorePair*w1*w2, msa1.GetSeqName(uSeqIndex1), msa2.GetSeqName(uSeqIndex2)); #endif } } if (0 == uPairCount) Quit("0 == uPairCount"); #if TRACE Log("msa1=\n"); msa1.LogMe(); Log("msa2=\n"); msa2.LogMe(); Log("XP=%g\n", scoreTotal); #endif // return scoreTotal / uPairCount; return scoreTotal; }
// The usual sum-of-pairs objective score: sum the score // of the alignment of each pair of sequences. SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps) { const unsigned uSeqCount = msa.GetSeqCount(); SCORE scoreTotal = 0; unsigned uPairCount = 0; #if TRACE msa.LogMe(); Log(" Score Weight Weight Total\n"); Log("---------- ------ ------ ----------\n"); #endif SCORE TotalLetters = 0; SCORE TotalGaps = 0; for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1) { const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1); for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2) { const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2); const WEIGHT w = w1*w2; SCORE Letters; SCORE Gaps; SCORE scorePair = ScoreSeqPair(msa, uSeqIndex1, msa, uSeqIndex2, &Letters, &Gaps); scoreTotal += w1*w2*scorePair; TotalLetters += w1*w2*Letters; TotalGaps += w1*w2*Gaps; ++uPairCount; #if TRACE Log("%10.2f %6.3f %6.3f %10.2f %d=%s %d=%s\n", scorePair, w1, w2, scorePair*w1*w2, uSeqIndex1, msa.GetSeqName(uSeqIndex1), uSeqIndex2, msa.GetSeqName(uSeqIndex2)); #endif } } *ptrLetters = TotalLetters; *ptrGaps = TotalGaps; return scoreTotal; }
// Objective score defined as the sum of profile-sequence // scores for each sequence in the alignment. The profile // is computed from the entire alignment, so this includes // the score of each sequence against itself. This is to // avoid recomputing the profile each time, so we reduce // complexity but introduce a questionable approximation. // The goal is to see if we can exploit the apparent // improvement in performance of log-expectation score // over the usual sum-of-pairs by optimizing this // objective score in the iterative refinement stage. SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[]) { if (g_PPScore != PPSCORE_LE) Quit("FastScoreMSA_LASimple: LA"); const unsigned uSeqCount = msa.GetSeqCount(); const unsigned uColCount = msa.GetColCount(); const ProfPos *Prof = ProfileFromMSA(msa); if (0 != MatchScore) for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) MatchScore[uColIndex] = 0; SCORE scoreTotal = 0; for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { const WEIGHT weightSeq = msa.GetSeqWeight(uSeqIndex); SCORE scoreSeq = 0; for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) { const ProfPos &PP = Prof[uColIndex]; if (msa.IsGap(uSeqIndex, uColIndex)) { bool bOpen = (0 == uColIndex || !msa.IsGap(uSeqIndex, uColIndex - 1)); bool bClose = (uColCount - 1 == uColIndex || !msa.IsGap(uSeqIndex, uColIndex + 1)); if (bOpen) scoreSeq += PP.m_scoreGapOpen; if (bClose) scoreSeq += PP.m_scoreGapClose; //if (!bOpen && !bClose) // scoreSeq += PP.m_scoreGapExtend; } else if (msa.IsWildcard(uSeqIndex, uColIndex)) continue; else { unsigned uLetter = msa.GetLetter(uSeqIndex, uColIndex); const SCORE scoreMatch = PP.m_AAScores[uLetter]; if (0 != MatchScore) MatchScore[uColIndex] += weightSeq*scoreMatch; scoreSeq += scoreMatch; } } scoreTotal += weightSeq*scoreSeq; } delete[] Prof; return scoreTotal; }
// The usual sum-of-pairs objective score: sum the score // of the alignment of each pair of sequences. SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[]) { #if TRACE Log("==================ObjScoreSP==============\n"); Log("msa=\n"); msa.LogMe(); #endif g_SPScoreLetters = 0; g_SPScoreGaps = 0; if (0 != MatchScore) { const unsigned uColCount = msa.GetColCount(); for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) MatchScore[uColIndex] = 0; } const unsigned uSeqCount = msa.GetSeqCount(); SCORE scoreTotal = 0; unsigned uPairCount = 0; #if TRACE Log("Seq1 Seq2 wt1 wt2 Letters Gaps Unwt.Score Wt.Score Total\n"); Log("---- ---- ------ ------ ---------- ---------- ---------- ---------- ----------\n"); #endif for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1) { const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1); for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2) { const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2); const WEIGHT w = w1*w2; SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2); SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2); SCORE scorePair = scoreLetters + scoreGaps; ++uPairCount; scoreTotal += w*scorePair; g_SPScoreLetters += w*scoreLetters; g_SPScoreGaps += w*scoreGaps; #if TRACE Log("%4d %4d %6.3f %6.3f %10.2f %10.2f %10.2f %10.2f %10.2f >%s >%s\n", uSeqIndex1, uSeqIndex2, w1, w2, scoreLetters, scoreGaps, scorePair, scorePair*w1*w2, scoreTotal, msa.GetSeqName(uSeqIndex1), msa.GetSeqName(uSeqIndex2)); #endif } } #if TEST_SPFAST { SCORE f = ObjScoreSPFast(msa); Log("Fast = %.6g\n", f); Log("Brute = %.6g\n", scoreTotal); if (BTEq(f, scoreTotal)) Log("Agree\n"); else Log("** DISAGREE **\n"); } #endif // return scoreTotal / uPairCount; return scoreTotal; }
static SCORE ScoreColLetters(const MSA &msa, unsigned uColIndex) { MuscleContext *ctx = getMuscleContext(); SCOREMATRIX &Mx = *ctx->params.g_ptrScoreMatrix; unsigned &g_AlphaSize = ctx->alpha.g_AlphaSize; const unsigned uSeqCount = msa.GetSeqCount(); #if BRUTE_LETTERS SCORE BruteScore = 0; for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1) { unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex); if (uLetter1 >= g_AlphaSize) continue; WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1); for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2) { unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex); if (uLetter2 >= g_AlphaSize) continue; WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2); BruteScore += w1*w2*Mx[uLetter1][uLetter2]; } } #endif double N = 0; for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1) { WEIGHT w = msa.GetSeqWeight(uSeqIndex1); N += w; } if (N <= 0) return 0; FCOUNT Freqs[20]; memset(Freqs, 0, sizeof(Freqs)); SCORE Score = 0; for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1) { unsigned uLetter = msa.GetLetterEx(uSeqIndex1, uColIndex); if (uLetter >= g_AlphaSize) continue; WEIGHT w = msa.GetSeqWeight(uSeqIndex1); Freqs[uLetter] += w; Score -= w*w*Mx[uLetter][uLetter]; } for (unsigned uLetter1 = 0; uLetter1 < g_AlphaSize; ++uLetter1) { const FCOUNT f1 = Freqs[uLetter1]; Score += f1*f1*Mx[uLetter1][uLetter1]; for (unsigned uLetter2 = uLetter1 + 1; uLetter2 < g_AlphaSize; ++uLetter2) { const FCOUNT f2 = Freqs[uLetter2]; Score += 2*f1*f2*Mx[uLetter1][uLetter2]; } } Score /= 2; #if BRUTE_LETTERS assert(BTEq(BruteScore, Score)); #endif return Score; }
SCORE ScoreGaps(const MSA &msa, const unsigned DiffCols[], unsigned DiffColCount) { MuscleContext *ctx = getMuscleContext(); unsigned &g_ColCount = ctx->scoregaps.g_ColCount; unsigned &g_MaxSeqCount = ctx->scoregaps.g_MaxSeqCount; unsigned &g_MaxColCount = ctx->scoregaps.g_MaxColCount; GAPINFO** &g_Gaps = ctx->scoregaps.g_Gaps; bool* &g_ColDiff = ctx->scoregaps.g_ColDiff; #if TRACE { Log("ScoreGaps\n"); Log("DiffCols "); for (unsigned i = 0; i < DiffColCount; ++i) Log(" %u", DiffCols[i]); Log("\n"); Log("msa=\n"); msa.LogMe(); Log("\n"); } #endif const unsigned SeqCount = msa.GetSeqCount(); const unsigned ColCount = msa.GetColCount(); g_ColCount = ColCount; if (SeqCount > g_MaxSeqCount) { delete[] g_Gaps; g_MaxSeqCount = SeqCount + 256; g_Gaps = new GAPINFO *[g_MaxSeqCount]; } memset(g_Gaps, 0, SeqCount*sizeof(GAPINFO *)); if (ColCount > g_MaxColCount) { delete[] g_ColDiff; g_MaxColCount = ColCount + 256; g_ColDiff = new bool[g_MaxColCount]; } memset(g_ColDiff, 0, g_ColCount*sizeof(bool)); for (unsigned i = 0; i < DiffColCount; ++i) { unsigned Col = DiffCols[i]; assert(Col < ColCount); g_ColDiff[Col] = true; } for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex) FindIntersectingGaps(msa, SeqIndex); #if TRACE { Log("\n"); Log("Intersecting gaps:\n"); Log(" "); for (unsigned Col = 0; Col < ColCount; ++Col) Log("%c", g_ColDiff[Col] ? '*' : ' '); Log("\n"); Log(" "); for (unsigned Col = 0; Col < ColCount; ++Col) Log("%d", Col%10); Log("\n"); for (unsigned Seq = 0; Seq < SeqCount; ++Seq) { Log("%3d: ", Seq); for (unsigned Col = 0; Col < ColCount; ++Col) Log("%c", msa.GetChar(Seq, Col)); Log(" :: "); for (GAPINFO *GI = g_Gaps[Seq]; GI; GI = GI->Next) Log(" (%d,%d)", GI->Start, GI->End); Log(" >%s\n", msa.GetSeqName(Seq)); } Log("\n"); } #endif SCORE Score = 0; for (unsigned Seq1 = 0; Seq1 < SeqCount; ++Seq1) { const WEIGHT w1 = msa.GetSeqWeight(Seq1); for (unsigned Seq2 = Seq1 + 1; Seq2 < SeqCount; ++Seq2) { const WEIGHT w2 = msa.GetSeqWeight(Seq2); // const SCORE Pair = ScorePair(Seq1, Seq2); const SCORE Pair = ScoreSeqPairGaps(msa, Seq1, msa, Seq2); Score += w1*w2*Pair; #if TRACE Log("Seq1=%u Seq2=%u ScorePair=%.4g w1=%.4g w2=%.4g Sum=%.4g\n", Seq1, Seq2, Pair, w1, w2, Score); #endif } } return Score; }
// this is a version of the profile x profile score that computes // a per-site score suitable for use with anchoring heuristics SCORE LetterObjScoreXP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[]) { const unsigned uColCount1 = msa1.GetColCount(); const unsigned uColCount2 = msa2.GetColCount(); if (uColCount1 != uColCount2) Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2); const unsigned uSeqCount1 = msa1.GetSeqCount(); const unsigned uSeqCount2 = msa2.GetSeqCount(); #if TRACE Log(" Score Weight Weight Total\n"); Log("---------- ------ ------ ----------\n"); #endif SCORE* mmScore = NULL; SCORE* ggScore = NULL; if( MatchScore != NULL ) { mmScore = new SCORE[uColCount1]; ggScore = new SCORE[uColCount1]; memset( MatchScore, 0, sizeof(SCORE)*uColCount1 ); } SCORE scoreTotal = 0; unsigned uPairCount = 0; for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1) { const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1); for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2) { if( mmScore != NULL ) memset( mmScore, 0, sizeof(SCORE)*uColCount1 ); if( ggScore != NULL ) memset( ggScore, 0, sizeof(SCORE)*uColCount1 ); const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2); const WEIGHT w = w1*w2; SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2, mmScore); SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2, ggScore); SCORE scorePair = scoreLetters + scoreGaps; scoreTotal += w*scorePair; ++uPairCount; if( MatchScore != NULL ) for( unsigned uColIndex = 0; uColIndex < uColCount1; ++uColIndex ) MatchScore[uColIndex] += w*(mmScore[uColIndex]+ggScore[uColIndex]); #if TRACE Log("%10.2f %6.3f %6.3f %10.2f >%s >%s\n", scorePair, w1, w2, scorePair*w1*w2, msa1.GetSeqName(uSeqIndex1), msa2.GetSeqName(uSeqIndex2)); #endif } } if (0 == uPairCount) Quit("0 == uPairCount"); #if TRACE Log("msa1=\n"); msa1.LogMe(); Log("msa2=\n"); msa2.LogMe(); Log("XP=%g\n", scoreTotal); #endif // return scoreTotal / uPairCount; if( mmScore != NULL ) delete[] mmScore; if( ggScore != NULL ) delete[] ggScore; return scoreTotal; }