void MSA::ListWeights() const { const unsigned uSeqCount = GetSeqCount(); Log("Weights:\n"); WEIGHT wTotal = 0; for (unsigned n = 0; n < uSeqCount; ++n) { wTotal += GetSeqWeight(n); Log("%6.3f %s\n", GetSeqWeight(n), GetSeqName(n)); } Log("Total weights = %6.3f, should be 1.0\n", wTotal); }
void MSA::GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize, FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd, FCOUNT *ptrfcGapExtend, FCOUNT *ptrfOcc, FCOUNT *ptrfcLL, FCOUNT *ptrfcLG, FCOUNT *ptrfcGL, FCOUNT *ptrfcGG) const { const unsigned uSeqCount = GetSeqCount(); const unsigned uColCount = GetColCount(); const char* seqName; memset(fcCounts, 0, g_AlphaSize*sizeof(FCOUNT)); WEIGHT wTotal = 0; FCOUNT fGap = 0; for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { const WEIGHT w = GetSeqWeight(uSeqIndex); if (IsGap(uSeqIndex, uColIndex)) { fGap += w; continue; } else if (IsWildcard(uSeqIndex, uColIndex)) { const unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex); switch (g_Alpha) { case ALPHA_Amino: switch (uLetter) { case AX_B: // D or N fcCounts[AX_D] += w/2; fcCounts[AX_N] += w/2; break; case AX_Z: // E or Q fcCounts[AX_E] += w/2; fcCounts[AX_Q] += w/2; break; default: // any { const FCOUNT f = w/20; for (unsigned uLetter = 0; uLetter < 20; ++uLetter) fcCounts[uLetter] += f; break; } } break; case ALPHA_DNA: case ALPHA_RNA: switch (uLetter) { case AX_R: // G or A fcCounts[NX_G] += w/2; fcCounts[NX_A] += w/2; break; case AX_Y: // C or T/U fcCounts[NX_C] += w/2; fcCounts[NX_T] += w/2; break; default: // any const FCOUNT f = w/20; for (unsigned uLetter = 0; uLetter < 4; ++uLetter) fcCounts[uLetter] += f; break; } break; default: Quit("Alphabet %d not supported", g_Alpha); } continue; } unsigned uLetter = GetLetter(uSeqIndex, uColIndex); //BEGIN MODIFICATIONS TO MUSCLE int original=0; for(unsigned i=0; i<uColIndex; i++){ if (i >= this->GetColCount()){break;} ++original; char c = GetChar(uSeqIndex, i); if(c== '-'){ original--; } } seqName = this->GetSeqName(uSeqIndex); int compositeVectPosition; compositeVectPosition = atoi(seqName); CompositeVect CV = *CVLocation; Composite* CVL = CV[compositeVectPosition]; Composite C = *CVL; for(int j=0; j<21; j++){ fcCounts[j] = w*C[original][j]; wTotal = w*C[original][j]; } //ORIGINAL MUSLCE LINE WAS: //fcCounts[uLetter] += w; //wTotal += w; //END MODIFICATIONS TO MUSCLE } *ptrfOcc = (float) (1.0 - fGap); if (bNormalize && wTotal > 0) { if (wTotal > 1.001) Quit("wTotal=%g\n", wTotal); for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter) fcCounts[uLetter] /= wTotal; // AssertNormalized(fcCounts); } FCOUNT fcStartCount = 0; if (uColIndex == 0) { for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) if (IsGap(uSeqIndex, uColIndex)) fcStartCount += GetSeqWeight(uSeqIndex); } else { for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex - 1)) fcStartCount += GetSeqWeight(uSeqIndex); } FCOUNT fcEndCount = 0; if (uColCount - 1 == uColIndex) { for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) if (IsGap(uSeqIndex, uColIndex)) fcEndCount += GetSeqWeight(uSeqIndex); } else { for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex + 1)) fcEndCount += GetSeqWeight(uSeqIndex); } FCOUNT LL = 0; FCOUNT LG = 0; FCOUNT GL = 0; FCOUNT GG = 0; for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { WEIGHT w = GetSeqWeight(uSeqIndex); bool bLetterHere = !IsGap(uSeqIndex, uColIndex); bool bLetterPrev = (uColIndex == 0 || !IsGap(uSeqIndex, uColIndex - 1)); if (bLetterHere) { if (bLetterPrev) LL += w; else GL += w; } else { if (bLetterPrev) LG += w; else GG += w; } } FCOUNT fcExtendCount = 0; if (uColIndex > 0 && uColIndex < GetColCount() - 1) for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) if (IsGap(uSeqIndex, uColIndex) && IsGap(uSeqIndex, uColIndex - 1) && IsGap(uSeqIndex, uColIndex + 1)) fcExtendCount += GetSeqWeight(uSeqIndex); *ptrfcLL = LL; *ptrfcLG = LG; *ptrfcGL = GL; *ptrfcGG = GG; *ptrfcGapStart = fcStartCount; *ptrfcGapEnd = fcEndCount; *ptrfcGapExtend = fcExtendCount; }
void MSA::GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize, FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd, FCOUNT *ptrfcGapExtend, FCOUNT *ptrfOcc, FCOUNT *ptrfcLL, FCOUNT *ptrfcLG, FCOUNT *ptrfcGL, FCOUNT *ptrfcGG) const { const unsigned uSeqCount = GetSeqCount(); const unsigned uColCount = GetColCount(); memset(fcCounts, 0, g_AlphaSize*sizeof(FCOUNT)); WEIGHT wTotal = 0; FCOUNT fGap = 0; for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { const WEIGHT w = GetSeqWeight(uSeqIndex); if (IsGap(uSeqIndex, uColIndex)) { fGap += w; continue; } else if (IsWildcard(uSeqIndex, uColIndex)) { const unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex); switch (g_Alpha) { case ALPHA_Amino: switch (uLetter) { case AX_B: // D or N fcCounts[AX_D] += w/2; fcCounts[AX_N] += w/2; break; case AX_Z: // E or Q fcCounts[AX_E] += w/2; fcCounts[AX_Q] += w/2; break; default: // any { const FCOUNT f = w/20; for (unsigned uLetter = 0; uLetter < 20; ++uLetter) fcCounts[uLetter] += f; break; } } break; case ALPHA_DNA: case ALPHA_RNA: switch (uLetter) { case AX_R: // G or A fcCounts[NX_G] += w/2; fcCounts[NX_A] += w/2; break; case AX_Y: // C or T/U fcCounts[NX_C] += w/2; fcCounts[NX_T] += w/2; break; default: // any const FCOUNT f = w/20; for (unsigned uLetter = 0; uLetter < 4; ++uLetter) fcCounts[uLetter] += f; break; } break; default: Quit("Alphabet %d not supported", g_Alpha); } continue; } unsigned uLetter = GetLetter(uSeqIndex, uColIndex); fcCounts[uLetter] += w; wTotal += w; } *ptrfOcc = (float) (1.0 - fGap); if (bNormalize && wTotal > 0) { if (wTotal > 1.001) Quit("wTotal=%g\n", wTotal); for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter) fcCounts[uLetter] /= wTotal; // AssertNormalized(fcCounts); } FCOUNT fcStartCount = 0; if (uColIndex == 0) { for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) if (IsGap(uSeqIndex, uColIndex)) fcStartCount += GetSeqWeight(uSeqIndex); } else { for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex - 1)) fcStartCount += GetSeqWeight(uSeqIndex); } FCOUNT fcEndCount = 0; if (uColCount - 1 == uColIndex) { for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) if (IsGap(uSeqIndex, uColIndex)) fcEndCount += GetSeqWeight(uSeqIndex); } else { for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex + 1)) fcEndCount += GetSeqWeight(uSeqIndex); } FCOUNT LL = 0; FCOUNT LG = 0; FCOUNT GL = 0; FCOUNT GG = 0; for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { WEIGHT w = GetSeqWeight(uSeqIndex); bool bLetterHere = !IsGap(uSeqIndex, uColIndex); bool bLetterPrev = (uColIndex == 0 || !IsGap(uSeqIndex, uColIndex - 1)); if (bLetterHere) { if (bLetterPrev) LL += w; else GL += w; } else { if (bLetterPrev) LG += w; else GG += w; } } FCOUNT fcExtendCount = 0; if (uColIndex > 0 && uColIndex < GetColCount() - 1) for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) if (IsGap(uSeqIndex, uColIndex) && IsGap(uSeqIndex, uColIndex - 1) && IsGap(uSeqIndex, uColIndex + 1)) fcExtendCount += GetSeqWeight(uSeqIndex); *ptrfcLL = LL; *ptrfcLG = LG; *ptrfcGL = GL; *ptrfcGG = GG; *ptrfcGapStart = fcStartCount; *ptrfcGapEnd = fcEndCount; *ptrfcGapExtend = fcExtendCount; }