SCORE GlobalAlign(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, unsigned uLengthB, PWPath &Path) { #if TIMING TICKS t1 = GetClockTicks(); #endif g_bKeepSimpleDP = true; PWPath SimplePath; GlobalAlignSimple(PA, uLengthA, PB, uLengthB, SimplePath); SCORE Score = NWSmall(PA, uLengthA, PB, uLengthB, Path); if (!Path.Equal(SimplePath)) { Log("Simple:\n"); SimplePath.LogMe(); Log("Small:\n"); Path.LogMe(); Quit("Paths differ"); } #if TIMING TICKS t2 = GetClockTicks(); g_ticksDP += (t2 - t1); #endif return Score; }
static void AppendRegPath(PWPath &Path, const PWPath &RegPath) { const unsigned uRegEdgeCount = RegPath.GetEdgeCount(); for (unsigned uRegEdgeIndex = 0; uRegEdgeIndex < uRegEdgeCount; ++uRegEdgeIndex) { const PWEdge &RegEdge = RegPath.GetEdge(uRegEdgeIndex); Path.AppendEdge(RegEdge); } }
static void AllDeletes(PWPath &Path, unsigned uLengthA) { Path.Clear(); PWEdge Edge; Edge.cType = 'D'; Edge.uPrefixLengthB = 0; for (unsigned uPrefixLengthA = 1; uPrefixLengthA <= uLengthA; ++uPrefixLengthA) { Edge.uPrefixLengthA = uPrefixLengthA; Path.AppendEdge(Edge); } }
static void AllInserts(PWPath &Path, unsigned uLengthB) { Path.Clear(); PWEdge Edge; Edge.cType = 'I'; Edge.uPrefixLengthA = 0; for (unsigned uPrefixLengthB = 1; uPrefixLengthB <= uLengthB; ++uPrefixLengthB) { Edge.uPrefixLengthB = uPrefixLengthB; Path.AppendEdge(Edge); } }
static void DiagToPath(const Diag &d, PWPath &Path) { Path.Clear(); const unsigned uLength = d.m_uLength; for (unsigned i = 0; i < uLength; ++i) { PWEdge Edge; Edge.cType = 'M'; Edge.uPrefixLengthA = d.m_uStartPosA + i + 1; Edge.uPrefixLengthB = d.m_uStartPosB + i + 1; Path.AppendEdge(Edge); } }
static void OffsetPath(PWPath &Path, unsigned uOffsetA, unsigned uOffsetB) { const unsigned uEdgeCount = Path.GetEdgeCount(); for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) { const PWEdge &Edge = Path.GetEdge(uEdgeIndex); // Nasty hack -- poke new values back into path, circumventing class PWEdge &NonConstEdge = (PWEdge &) Edge; NonConstEdge.uPrefixLengthA += uOffsetA; NonConstEdge.uPrefixLengthB += uOffsetB; } }
static void PathSeq(const Seq &s, const PWPath &Path, bool bRight, Seq &sOut) { short *esA; short *esB; PathToEstrings(Path, &esA, &esB); const unsigned uSeqLength = s.Length(); const unsigned uEdgeCount = Path.GetEdgeCount(); sOut.Clear(); sOut.SetName(s.GetName()); unsigned uPos = 0; for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) { const PWEdge &Edge = Path.GetEdge(uEdgeIndex); char cType = Edge.cType; if (bRight) { if (cType == 'I') cType = 'D'; else if (cType == 'D') cType = 'I'; } switch (cType) { case 'M': sOut.AppendChar(s[uPos++]); break; case 'D': sOut.AppendChar('-'); break; case 'I': sOut.AppendChar(s[uPos++]); break; default: Quit("PathSeq, invalid edge type %c", cType); } } }
void DiagList::FromPath(const PWPath &Path) { unsigned &g_uMinDiagLength = getMuscleContext()->params.g_uMinDiagLength; Clear(); const unsigned uEdgeCount = Path.GetEdgeCount(); unsigned uLength = 0; unsigned uStartPosA; unsigned uStartPosB; for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) { const PWEdge &Edge = Path.GetEdge(uEdgeIndex); // Typical cases if (Edge.cType == 'M') { if (0 == uLength) { uStartPosA = Edge.uPrefixLengthA - 1; uStartPosB = Edge.uPrefixLengthB - 1; } ++uLength; } else { if (uLength >= g_uMinDiagLength) Add(uStartPosA, uStartPosB, uLength); uLength = 0; } } // Special case for last edge if (uLength >= g_uMinDiagLength) Add(uStartPosA, uStartPosB, uLength); }
bool TryRealign(MSA &msaIn, const Tree &tree, const unsigned Leaves1[], unsigned uCount1, const unsigned Leaves2[], unsigned uCount2, SCORE *ptrscoreBefore, SCORE *ptrscoreAfter, bool bLockLeft, bool bLockRight) { #if TRACE Log("TryRealign, msaIn=\n"); #endif MuscleContext *ctx = getMuscleContext(); const unsigned uSeqCount = msaIn.GetSeqCount(); unsigned *Ids1 = new unsigned[uSeqCount]; unsigned *Ids2 = new unsigned[uSeqCount]; LeafIndexesToIds(tree, Leaves1, uCount1, Ids1); LeafIndexesToIds(tree, Leaves2, uCount2, Ids2); MSA msa1; MSA msa2; MSASubsetByIds(msaIn, Ids1, uCount1, msa1); MSASubsetByIds(msaIn, Ids2, uCount2, msa2); #if DEBUG ValidateMuscleIds(msa1); ValidateMuscleIds(msa2); #endif // Computing the objective score may be expensive for // large numbers of sequences. As a speed optimization, // we check whether the alignment changes. If it does // not change, there is no need to compute the objective // score. We test for the alignment changing by comparing // the Viterbi paths before and after re-aligning. PWPath pathBefore; pathBefore.FromMSAPair(msa1, msa2); DeleteGappedCols(msa1); DeleteGappedCols(msa2); if (0 == msa1.GetColCount() || 0 == msa2.GetColCount()) { delete[] Ids1; delete[] Ids2; return false; } MSA msaRealigned; PWPath pathAfter; AlignTwoMSAs(msa1, msa2, msaRealigned, pathAfter, bLockLeft, bLockRight); bool bAnyChanges = !pathAfter.Equal(pathBefore); unsigned uDiffCount1; unsigned uDiffCount2; unsigned* Edges1 = ctx->refinehoriz.Edges1; unsigned* Edges2 = ctx->refinehoriz.Edges2; DiffPaths(pathBefore, pathAfter, Edges1, &uDiffCount1, Edges2, &uDiffCount2); #if TRACE Log("TryRealign, msa1=\n"); Log("\nmsa2=\n"); Log("\nRealigned (changes %s)=\n", bAnyChanges ? "TRUE" : "FALSE"); #endif if (!bAnyChanges) { *ptrscoreBefore = 0; *ptrscoreAfter = 0; delete[] Ids1; delete[] Ids2; return false; } SetMSAWeightsMuscle(msaIn); SetMSAWeightsMuscle(msaRealigned); #if DIFFOBJSCORE const SCORE scoreDiff = DiffObjScore(msaIn, pathBefore, Edges1, uDiffCount1, msaRealigned, pathAfter, Edges2, uDiffCount2); bool bAccept = (scoreDiff > 0); *ptrscoreBefore = 0; *ptrscoreAfter = scoreDiff; //const SCORE scoreBefore = ObjScoreIds(msaIn, Ids1, uCount1, Ids2, uCount2); //const SCORE scoreAfter = ObjScoreIds(msaRealigned, Ids1, uCount1, Ids2, uCount2); //Log("Diff = %.3g %.3g\n", scoreDiff, scoreAfter - scoreBefore); #else const SCORE scoreBefore = ObjScoreIds(msaIn, Ids1, uCount1, Ids2, uCount2); const SCORE scoreAfter = ObjScoreIds(msaRealigned, Ids1, uCount1, Ids2, uCount2); bool bAccept = (scoreAfter > scoreBefore); #if TRACE Log("Score %g -> %g Accept %s\n", scoreBefore, scoreAfter, bAccept ? "TRUE" : "FALSE"); #endif *ptrscoreBefore = scoreBefore; *ptrscoreAfter = scoreAfter; #endif if (bAccept) msaIn.Copy(msaRealigned); delete[] Ids1; delete[] Ids2; return bAccept; }
ProgNode *ProgressiveAlignE(const SeqVect &v, const Tree &GuideTree, MSA &a) { assert(GuideTree.IsRooted()); #if TRACE Log("GuideTree:\n"); GuideTree.LogMe(); #endif const unsigned uSeqCount = v.Length(); const unsigned uNodeCount = 2*uSeqCount - 1; const unsigned uIterCount = uSeqCount - 1; WEIGHT *Weights = new WEIGHT[uSeqCount]; CalcClustalWWeights(GuideTree, Weights); ProgNode *ProgNodes = new ProgNode[uNodeCount]; unsigned uJoin = 0; unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode(); SetProgressDesc("Align node"); do { if (GuideTree.IsLeaf(uTreeNodeIndex)) { if (uTreeNodeIndex >= uNodeCount) Quit("TreeNodeIndex=%u NodeCount=%u\n", uTreeNodeIndex, uNodeCount); ProgNode &Node = ProgNodes[uTreeNodeIndex]; unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex); if (uId >= uSeqCount) Quit("Seq index out of range"); const Seq &s = *(v[uId]); Node.m_MSA.FromSeq(s); Node.m_MSA.SetSeqId(0, uId); Node.m_uLength = Node.m_MSA.GetColCount(); Node.m_Weight = Weights[uId]; // TODO: Term gaps settable Node.m_Prof = ProfileFromMSA(Node.m_MSA); Node.m_EstringL = 0; Node.m_EstringR = 0; #if TRACE Log("Leaf id=%u\n", uId); Log("MSA=\n"); Node.m_MSA.LogMe(); Log("Profile (from MSA)=\n"); ListProfile(Node.m_Prof, Node.m_uLength, &Node.m_MSA); #endif } else { Progress(uJoin, uSeqCount - 1); ++uJoin; const unsigned uMergeNodeIndex = uTreeNodeIndex; ProgNode &Parent = ProgNodes[uMergeNodeIndex]; const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex); const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex); if (g_bVerbose) { Log("Align: ("); LogLeafNames(GuideTree, uLeft); Log(") ("); LogLeafNames(GuideTree, uRight); Log(")\n"); } ProgNode &Node1 = ProgNodes[uLeft]; ProgNode &Node2 = ProgNodes[uRight]; #if TRACE Log("AlignTwoMSAs:\n"); #endif AlignTwoProfs( Node1.m_Prof, Node1.m_uLength, Node1.m_Weight, Node2.m_Prof, Node2.m_uLength, Node2.m_Weight, Parent.m_Path, &Parent.m_Prof, &Parent.m_uLength); #if TRACE_LENGTH_DELTA { unsigned L = Node1.m_uLength; unsigned R = Node2.m_uLength; unsigned P = Parent.m_Path.GetEdgeCount(); unsigned Max = L > R ? L : R; unsigned d = P - Max; Log("LD%u;%u;%u;%u\n", L, R, P, d); } #endif PathToEstrings(Parent.m_Path, &Parent.m_EstringL, &Parent.m_EstringR); Parent.m_Weight = Node1.m_Weight + Node2.m_Weight; #if VALIDATE { #if TRACE Log("AlignTwoMSAs:\n"); #endif PWPath TmpPath; AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, TmpPath); ProfPos *P1 = ProfileFromMSA(Node1.m_MSA, true); ProfPos *P2 = ProfileFromMSA(Node2.m_MSA, true); unsigned uLength = Parent.m_MSA.GetColCount(); ProfPos *TmpProf = ProfileFromMSA(Parent.m_MSA, true); #if TRACE Log("Node1 MSA=\n"); Node1.m_MSA.LogMe(); Log("Node1 prof=\n"); ListProfile(Node1.m_Prof, Node1.m_MSA.GetColCount(), &Node1.m_MSA); Log("Node1 prof (from MSA)=\n"); ListProfile(P1, Node1.m_MSA.GetColCount(), &Node1.m_MSA); AssertProfsEq(Node1.m_Prof, Node1.m_uLength, P1, Node1.m_MSA.GetColCount()); Log("Node2 prof=\n"); ListProfile(Node2.m_Prof, Node2.m_MSA.GetColCount(), &Node2.m_MSA); Log("Node2 MSA=\n"); Node2.m_MSA.LogMe(); Log("Node2 prof (from MSA)=\n"); ListProfile(P2, Node2.m_MSA.GetColCount(), &Node2.m_MSA); AssertProfsEq(Node2.m_Prof, Node2.m_uLength, P2, Node2.m_MSA.GetColCount()); TmpPath.AssertEqual(Parent.m_Path); Log("Parent MSA=\n"); Parent.m_MSA.LogMe(); Log("Parent prof=\n"); ListProfile(Parent.m_Prof, Parent.m_uLength, &Parent.m_MSA); Log("Parent prof (from MSA)=\n"); ListProfile(TmpProf, Parent.m_MSA.GetColCount(), &Parent.m_MSA); #endif // TRACE AssertProfsEq(Parent.m_Prof, Parent.m_uLength, TmpProf, Parent.m_MSA.GetColCount()); delete[] P1; delete[] P2; delete[] TmpProf; } #endif // VALIDATE Node1.m_MSA.Clear(); Node2.m_MSA.Clear(); // Don't delete profiles, may need them for tree refinement. //delete[] Node1.m_Prof; //delete[] Node2.m_Prof; //Node1.m_Prof = 0; //Node2.m_Prof = 0; } uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex); } while (NULL_NEIGHBOR != uTreeNodeIndex); ProgressStepsDone(); if (g_bBrenner) MakeRootMSABrenner((SeqVect &) v, GuideTree, ProgNodes, a); else MakeRootMSA(v, GuideTree, ProgNodes, a); #if VALIDATE { unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex(); const ProgNode &RootProgNode = ProgNodes[uRootNodeIndex]; AssertMSAEq(a, RootProgNode.m_MSA); } #endif delete[] Weights; return ProgNodes; }
void PathToEstrings(const PWPath &Path, short **ptresA, short **ptresB) { // First pass to determine size of estrings esA and esB const unsigned uEdgeCount = Path.GetEdgeCount(); if (0 == uEdgeCount) { short *esA = new short[1]; short *esB = new short[1]; esA[0] = 0; esB[0] = 0; *ptresA = esA; *ptresB = esB; return; } unsigned iLengthA = 1; unsigned iLengthB = 1; const char cFirstEdgeType = Path.GetEdge(0).cType; char cPrevEdgeType = cFirstEdgeType; for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex) { const PWEdge &Edge = Path.GetEdge(uEdgeIndex); char cEdgeType = Edge.cType; switch (c2(cPrevEdgeType, cEdgeType)) { case c2('M', 'M'): case c2('D', 'D'): case c2('I', 'I'): break; case c2('D', 'M'): case c2('M', 'D'): ++iLengthB; break; case c2('I', 'M'): case c2('M', 'I'): ++iLengthA; break; case c2('I', 'D'): case c2('D', 'I'): ++iLengthB; ++iLengthA; break; default: assert(false); } cPrevEdgeType = cEdgeType; } // Pass2 for seq A { short *esA = new short[iLengthA+1]; unsigned iA = 0; switch (Path.GetEdge(0).cType) { case 'M': case 'D': esA[0] = 1; break; case 'I': esA[0] = -1; break; default: assert(false); } char cPrevEdgeType = cFirstEdgeType; for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex) { const PWEdge &Edge = Path.GetEdge(uEdgeIndex); char cEdgeType = Edge.cType; switch (c2(cPrevEdgeType, cEdgeType)) { case c2('M', 'M'): case c2('D', 'D'): case c2('D', 'M'): case c2('M', 'D'): ++(esA[iA]); break; case c2('I', 'D'): case c2('I', 'M'): ++iA; esA[iA] = 1; break; case c2('M', 'I'): case c2('D', 'I'): ++iA; esA[iA] = -1; break; case c2('I', 'I'): --(esA[iA]); break; default: assert(false); } cPrevEdgeType = cEdgeType; } assert(iA == iLengthA - 1); esA[iLengthA] = 0; *ptresA = esA; } { // Pass2 for seq B short *esB = new short[iLengthB+1]; unsigned iB = 0; switch (Path.GetEdge(0).cType) { case 'M': case 'I': esB[0] = 1; break; case 'D': esB[0] = -1; break; default: assert(false); } char cPrevEdgeType = cFirstEdgeType; for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex) { const PWEdge &Edge = Path.GetEdge(uEdgeIndex); char cEdgeType = Edge.cType; switch (c2(cPrevEdgeType, cEdgeType)) { case c2('M', 'M'): case c2('I', 'I'): case c2('I', 'M'): case c2('M', 'I'): ++(esB[iB]); break; case c2('D', 'I'): case c2('D', 'M'): ++iB; esB[iB] = 1; break; case c2('M', 'D'): case c2('I', 'D'): ++iB; esB[iB] = -1; break; case c2('D', 'D'): --(esB[iB]); break; default: assert(false); } cPrevEdgeType = cEdgeType; } assert(iB == iLengthB - 1); esB[iLengthB] = 0; *ptresB = esB; } #if DEBUG { const PWEdge &LastEdge = Path.GetEdge(uEdgeCount - 1); unsigned uSymbols; unsigned uIndels; EstringCounts(*ptresA, &uSymbols, &uIndels); assert(uSymbols == LastEdge.uPrefixLengthA); assert(uSymbols + uIndels == uEdgeCount); EstringCounts(*ptresB, &uSymbols, &uIndels); assert(uSymbols == LastEdge.uPrefixLengthB); assert(uSymbols + uIndels == uEdgeCount); PWPath TmpPath; EstringsToPath(*ptresA, *ptresB, TmpPath); TmpPath.AssertEqual(Path); } #endif }
void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB, MSA &msaCombined) { msaCombined.Clear(); #if TRACE Log("AlignTwoMSAsGivenPathSW\n"); Log("Template A:\n"); msaA.LogMe(); Log("Template B:\n"); msaB.LogMe(); #endif const unsigned uColCountA = msaA.GetColCount(); const unsigned uColCountB = msaB.GetColCount(); const unsigned uSeqCountA = msaA.GetSeqCount(); const unsigned uSeqCountB = msaB.GetSeqCount(); msaCombined.SetSeqCount(uSeqCountA + uSeqCountB); // Copy sequence names into combined MSA for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA) { msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA)); msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA)); } for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB) { msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB)); msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB)); } unsigned uColIndexA = 0; unsigned uColIndexB = 0; unsigned uColIndexCombined = 0; const unsigned uEdgeCount = Path.GetEdgeCount(); for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) { const PWEdge &Edge = Path.GetEdge(uEdgeIndex); #if TRACE Log("\nEdge %u %c%u.%u\n", uEdgeIndex, Edge.cType, Edge.uPrefixLengthA, Edge.uPrefixLengthB); #endif const char cType = Edge.cType; const unsigned uPrefixLengthA = Edge.uPrefixLengthA; unsigned uColCountA = 0; if (uPrefixLengthA > 0) { const unsigned uNodeIndexA = uPrefixLengthA - 1; const unsigned uTplColIndexA = uNodeIndexA; if (uTplColIndexA > uColIndexA) uColCountA = uTplColIndexA - uColIndexA; } const unsigned uPrefixLengthB = Edge.uPrefixLengthB; unsigned uColCountB = 0; if (uPrefixLengthB > 0) { const unsigned uNodeIndexB = uPrefixLengthB - 1; const unsigned uTplColIndexB = uNodeIndexB; if (uTplColIndexB > uColIndexB) uColCountB = uTplColIndexB - uColIndexB; } AppendUnalignedTerminals(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined); switch (cType) { case 'M': { assert(uPrefixLengthA > 0); assert(uPrefixLengthB > 0); const unsigned uColA = uPrefixLengthA - 1; const unsigned uColB = uPrefixLengthB - 1; assert(uColIndexA == uColA); assert(uColIndexB == uColB); AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined); break; } case 'D': { assert(uPrefixLengthA > 0); const unsigned uColA = uPrefixLengthA - 1; assert(uColIndexA == uColA); AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined); break; } case 'I': { assert(uPrefixLengthB > 0); const unsigned uColB = uPrefixLengthB - 1; assert(uColIndexB == uColB); AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined); break; } default: assert(false); } } unsigned uInsertColCountA = uColCountA - uColIndexA; unsigned uInsertColCountB = uColCountB - uColIndexB; AppendUnalignedTerminals(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB, uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined); }
SCORE SW(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, unsigned uLengthB, PWPath &Path) { assert(uLengthB > 0 && uLengthA > 0); const unsigned uPrefixCountA = uLengthA + 1; const unsigned uPrefixCountB = uLengthB + 1; // Allocate DP matrices const size_t LM = uPrefixCountA*uPrefixCountB; SCORE *DPM_ = new SCORE[LM]; SCORE *DPD_ = new SCORE[LM]; SCORE *DPI_ = new SCORE[LM]; DPM(0, 0) = 0; DPD(0, 0) = MINUS_INFINITY; DPI(0, 0) = MINUS_INFINITY; DPM(1, 0) = MINUS_INFINITY; DPD(1, 0) = MINUS_INFINITY; DPI(1, 0) = MINUS_INFINITY; DPM(0, 1) = MINUS_INFINITY; DPD(0, 1) = MINUS_INFINITY; DPI(0, 1) = MINUS_INFINITY; // Empty prefix of B is special case for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA) { // M=LetterA+LetterB, impossible with empty prefix DPM(uPrefixLengthA, 0) = MINUS_INFINITY; // D=LetterA+GapB, never optimal in local alignment with gap penalties DPD(uPrefixLengthA, 0) = MINUS_INFINITY; // I=GapA+LetterB, impossible with empty prefix DPI(uPrefixLengthA, 0) = MINUS_INFINITY; } // Empty prefix of A is special case for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB) { // M=LetterA+LetterB, impossible with empty prefix DPM(0, uPrefixLengthB) = MINUS_INFINITY; // D=LetterA+GapB, impossible with empty prefix DPD(0, uPrefixLengthB) = MINUS_INFINITY; // I=GapA+LetterB, never optimal in local alignment with gap penalties DPI(0, uPrefixLengthB) = MINUS_INFINITY; } SCORE scoreMax = MINUS_INFINITY; unsigned uPrefixLengthAMax = uInsane; unsigned uPrefixLengthBMax = uInsane; // ============ // Main DP loop // ============ SCORE scoreGapCloseB = MINUS_INFINITY; for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB) { const ProfPos &PPB = PB[uPrefixLengthB - 1]; SCORE scoreGapCloseA = MINUS_INFINITY; for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA) { const ProfPos &PPA = PA[uPrefixLengthA - 1]; { // Match M=LetterA+LetterB SCORE scoreLL = ScoreProfPos2(PPA, PPB); SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1); SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA; SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB; SCORE scoreBest; if (scoreMM >= scoreDM && scoreMM >= scoreIM) scoreBest = scoreMM; else if (scoreDM >= scoreMM && scoreDM >= scoreIM) scoreBest = scoreDM; else { assert(scoreIM >= scoreMM && scoreIM >= scoreDM); scoreBest = scoreIM; } if (scoreBest < 0) scoreBest = 0; scoreBest += scoreLL; if (scoreBest > scoreMax) { scoreMax = scoreBest; uPrefixLengthAMax = uPrefixLengthA; uPrefixLengthBMax = uPrefixLengthB; } DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest; } { // Delete D=LetterA+GapB SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + PA[uPrefixLengthA-1].m_scoreGapOpen; SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB); SCORE scoreBest; if (scoreMD >= scoreDD) scoreBest = scoreMD; else { assert(scoreDD >= scoreMD); scoreBest = scoreDD; } DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest; } // Insert I=GapA+LetterB { SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + PB[uPrefixLengthB - 1].m_scoreGapOpen; SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1); SCORE scoreBest; if (scoreMI >= scoreII) scoreBest = scoreMI; else { assert(scoreII > scoreMI); scoreBest = scoreII; } DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest; } scoreGapCloseA = PPA.m_scoreGapClose; } scoreGapCloseB = PPB.m_scoreGapClose; } #if TRACE Log("DPM:\n"); ListDP(DPM_, PA, PB, uPrefixLengthA, uPrefixLengthB); Log("DPD:\n"); ListDP(DPD_, PA, PB, uPrefixLengthA, uPrefixLengthB); Log("DPI:\n"); ListDP(DPI_, PA, PB, uPrefixLengthA, uPrefixLengthB); #endif assert(scoreMax == DPM(uPrefixLengthAMax, uPrefixLengthBMax)); TraceBackSW(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_, uPrefixLengthAMax, uPrefixLengthBMax, Path); #if TRACE SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path); Path.LogMe(); Log("Score = %s Path = %s\n", LocalScoreToStr(scoreMax), LocalScoreToStr(scorePath)); #endif delete[] DPM_; delete[] DPD_; delete[] DPI_; return scoreMax; }
static SCORE TraceBackDimer( const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_, const char *TBM_, const char *TBD_, const char *TBI_, unsigned uLengthA, unsigned uLengthB, PWPath &Path) { const unsigned uPrefixCountA = uLengthA + 1; unsigned uPrefixLengthA = uLengthA; unsigned uPrefixLengthB = uLengthB; char cEdge = 'M'; SCORE scoreMax = DPM(uLengthA, uLengthB); if (DPD(uLengthA, uLengthB) > scoreMax) { scoreMax = DPD(uLengthA, uLengthB); cEdge = 'D'; } if (DPI(uLengthA, uLengthB) > scoreMax) { scoreMax = DPI(uLengthA, uLengthB); cEdge = 'I'; } for (;;) { if (0 == uPrefixLengthA && 0 == uPrefixLengthB) break; PWEdge Edge; Edge.cType = cEdge; Edge.uPrefixLengthA = uPrefixLengthA; Edge.uPrefixLengthB = uPrefixLengthB; Path.PrependEdge(Edge); #if TRACE Log("PLA=%u PLB=%u Edge=%c\n", uPrefixLengthA, uPrefixLengthB, cEdge); #endif switch (cEdge) { case 'M': assert(uPrefixLengthA > 0 && uPrefixLengthB > 0); cEdge = TBM(uPrefixLengthA, uPrefixLengthB); --uPrefixLengthA; --uPrefixLengthB; break; case 'D': assert(uPrefixLengthA > 0); cEdge = TBD(uPrefixLengthA, uPrefixLengthB); --uPrefixLengthA; break; case 'I': assert(uPrefixLengthB > 0); cEdge = TBI(uPrefixLengthA, uPrefixLengthB); --uPrefixLengthB; break; default: Quit("Invalid edge PLA=%u PLB=%u %c", uPrefixLengthA, uPrefixLengthB, cEdge); } } #if TRACE Path.LogMe(); #endif return scoreMax; }
SCORE GlobalAlignDiags(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, unsigned uLengthB, PWPath &Path) { #if LIST_DIAGS TICKS t1 = GetClockTicks(); #endif DiagList DL; if (ALPHA_Amino == g_Alpha) FindDiags(PA, uLengthA, PB, uLengthB, DL); else if (ALPHA_DNA == g_Alpha || ALPHA_RNA == g_Alpha) FindDiagsNuc(PA, uLengthA, PB, uLengthB, DL); else Quit("GlobalAlignDiags: bad alpha"); #if TRACE Log("GlobalAlignDiags, diag list:\n"); DL.LogMe(); #endif DL.Sort(); DL.DeleteIncompatible(); #if TRACE Log("After DeleteIncompatible:\n"); DL.LogMe(); #endif MergeDiags(DL); #if TRACE Log("After MergeDiags:\n"); DL.LogMe(); #endif DPRegionList RL; DiagListToDPRegionList(DL, RL, uLengthA, uLengthB); #if TRACE Log("RegionList:\n"); RL.LogMe(); #endif #if LIST_DIAGS { TICKS t2 = GetClockTicks(); unsigned uArea = RL.GetDPArea(); Log("ticks=%ld\n", (long) (t2 - t1)); Log("area=%u\n", uArea); } #endif g_dDPAreaWithoutDiags += uLengthA*uLengthB; double dDPAreaWithDiags = 0.0; const unsigned uRegionCount = RL.GetCount(); for (unsigned uRegionIndex = 0; uRegionIndex < uRegionCount; ++uRegionIndex) { const DPRegion &r = RL.Get(uRegionIndex); PWPath RegPath; if (DPREGIONTYPE_Diag == r.m_Type) { DiagToPath(r.m_Diag, RegPath); #if TRACE_PATH Log("DiagToPath, path=\n"); RegPath.LogMe(); #endif } else if (DPREGIONTYPE_Rect == r.m_Type) { const unsigned uRegStartPosA = r.m_Rect.m_uStartPosA; const unsigned uRegStartPosB = r.m_Rect.m_uStartPosB; const unsigned uRegLengthA = r.m_Rect.m_uLengthA; const unsigned uRegLengthB = r.m_Rect.m_uLengthB; const ProfPos *RegPA = PA + uRegStartPosA; const ProfPos *RegPB = PB + uRegStartPosB; dDPAreaWithDiags += uRegLengthA*uRegLengthB; GlobalAlignNoDiags(RegPA, uRegLengthA, RegPB, uRegLengthB, RegPath); #if TRACE_PATH Log("GlobalAlignNoDiags RegPath=\n"); RegPath.LogMe(); #endif OffsetPath(RegPath, uRegStartPosA, uRegStartPosB); #if TRACE_PATH Log("After offset path, RegPath=\n"); RegPath.LogMe(); #endif } else Quit("GlobalAlignDiags, Invalid region type %u", r.m_Type); AppendRegPath(Path, RegPath); #if TRACE_PATH Log("After AppendPath, path="); Path.LogMe(); #endif } #if TRACE { double dDPAreaWithoutDiags = uLengthA*uLengthB; Log("DP area with diags %.3g without %.3g pct saved %.3g %%\n", dDPAreaWithDiags, dDPAreaWithoutDiags, (1.0 - dDPAreaWithDiags/dDPAreaWithoutDiags)*100.0); } #endif g_dDPAreaWithDiags += dDPAreaWithDiags; return 0; }
SCORE NWDASimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, unsigned uLengthB, PWPath &Path) { assert(uLengthB > 0 && uLengthA > 0); const unsigned uPrefixCountA = uLengthA + 1; const unsigned uPrefixCountB = uLengthB + 1; // Allocate DP matrices const size_t LM = uPrefixCountA*uPrefixCountB; SCORE *DPL_ = new SCORE[LM]; SCORE *DPM_ = new SCORE[LM]; SCORE *DPD_ = new SCORE[LM]; SCORE *DPE_ = new SCORE[LM]; SCORE *DPI_ = new SCORE[LM]; SCORE *DPJ_ = new SCORE[LM]; char *TBM_ = new char[LM]; char *TBD_ = new char[LM]; char *TBE_ = new char[LM]; char *TBI_ = new char[LM]; char *TBJ_ = new char[LM]; memset(TBM_, '?', LM); memset(TBD_, '?', LM); memset(TBE_, '?', LM); memset(TBI_, '?', LM); memset(TBJ_, '?', LM); DPM(0, 0) = 0; DPD(0, 0) = MINUS_INFINITY; DPE(0, 0) = MINUS_INFINITY; DPI(0, 0) = MINUS_INFINITY; DPJ(0, 0) = MINUS_INFINITY; DPM(1, 0) = MINUS_INFINITY; DPD(1, 0) = PA[0].m_scoreGapOpen; DPE(1, 0) = PA[0].m_scoreGapOpen2; TBD(1, 0) = 'D'; TBE(1, 0) = 'E'; DPI(1, 0) = MINUS_INFINITY; DPJ(1, 0) = MINUS_INFINITY; DPM(0, 1) = MINUS_INFINITY; DPD(0, 1) = MINUS_INFINITY; DPE(0, 1) = MINUS_INFINITY; DPI(0, 1) = PB[0].m_scoreGapOpen; DPJ(0, 1) = PB[0].m_scoreGapOpen2; TBI(0, 1) = 'I'; TBJ(0, 1) = 'J'; // Empty prefix of B is special case for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA) { DPM(uPrefixLengthA, 0) = MINUS_INFINITY; DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) + g_scoreGapExtend.get(); DPE(uPrefixLengthA, 0) = DPE(uPrefixLengthA - 1, 0) + g_scoreGapExtend2.get(); TBD(uPrefixLengthA, 0) = 'D'; TBE(uPrefixLengthA, 0) = 'E'; DPI(uPrefixLengthA, 0) = MINUS_INFINITY; DPJ(uPrefixLengthA, 0) = MINUS_INFINITY; } // Empty prefix of A is special case for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB) { DPM(0, uPrefixLengthB) = MINUS_INFINITY; DPD(0, uPrefixLengthB) = MINUS_INFINITY; DPE(0, uPrefixLengthB) = MINUS_INFINITY; DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) + g_scoreGapExtend.get(); DPJ(0, uPrefixLengthB) = DPJ(0, uPrefixLengthB - 1) + g_scoreGapExtend2.get(); TBI(0, uPrefixLengthB) = 'I'; TBJ(0, uPrefixLengthB) = 'J'; } // Special case to agree with NWFast, no D-I transitions so... DPD(uLengthA, 0) = MINUS_INFINITY; DPE(uLengthA, 0) = MINUS_INFINITY; // DPI(0, uLengthB) = MINUS_INFINITY; // DPJ(0, uLengthB) = MINUS_INFINITY; // ============ // Main DP loop // ============ SCORE scoreGapCloseB = MINUS_INFINITY; SCORE scoreGapClose2B = MINUS_INFINITY; for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB) { const ProfPos &PPB = PB[uPrefixLengthB - 1]; SCORE scoreGapCloseA = MINUS_INFINITY; SCORE scoreGapClose2A = MINUS_INFINITY; for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA) { const ProfPos &PPA = PA[uPrefixLengthA - 1]; { // Match M=LetterA+LetterB SCORE scoreLL = ScoreProfPos2(PPA, PPB); DPL(uPrefixLengthA, uPrefixLengthB) = scoreLL; SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1); SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA; SCORE scoreEM = DPE(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2A; SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB; SCORE scoreJM = DPJ(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2B; SCORE scoreBest; if (scoreMM >= scoreDM && scoreMM >= scoreEM && scoreMM >= scoreIM && scoreMM >= scoreJM) { scoreBest = scoreMM; TBM(uPrefixLengthA, uPrefixLengthB) = 'M'; } else if (scoreDM >= scoreMM && scoreDM >= scoreEM && scoreDM >= scoreIM && scoreDM >= scoreJM) { scoreBest = scoreDM; TBM(uPrefixLengthA, uPrefixLengthB) = 'D'; } else if (scoreEM >= scoreMM && scoreEM >= scoreDM && scoreEM >= scoreIM && scoreEM >= scoreJM) { scoreBest = scoreEM; TBM(uPrefixLengthA, uPrefixLengthB) = 'E'; } else if (scoreIM >= scoreMM && scoreIM >= scoreDM && scoreIM >= scoreEM && scoreIM >= scoreJM) { scoreBest = scoreIM; TBM(uPrefixLengthA, uPrefixLengthB) = 'I'; } else { assert(scoreJM >= scoreMM && scoreJM >= scoreDM && scoreJM >= scoreEM && scoreJM >= scoreIM); scoreBest = scoreJM; TBM(uPrefixLengthA, uPrefixLengthB) = 'J'; } DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL; } { // Delete D=LetterA+GapB SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + PA[uPrefixLengthA-1].m_scoreGapOpen; SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend.get(); SCORE scoreBest; if (scoreMD >= scoreDD) { scoreBest = scoreMD; TBD(uPrefixLengthA, uPrefixLengthB) = 'M'; } else { assert(scoreDD >= scoreMD); scoreBest = scoreDD; TBD(uPrefixLengthA, uPrefixLengthB) = 'D'; } DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest; } { // Delete E=LetterA+GapB SCORE scoreME = DPM(uPrefixLengthA-1, uPrefixLengthB) + PA[uPrefixLengthA-1].m_scoreGapOpen2; SCORE scoreEE = DPE(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend2.get(); SCORE scoreBest; if (scoreME >= scoreEE) { scoreBest = scoreME; TBE(uPrefixLengthA, uPrefixLengthB) = 'M'; } else { assert(scoreEE >= scoreME); scoreBest = scoreEE; TBE(uPrefixLengthA, uPrefixLengthB) = 'E'; } DPE(uPrefixLengthA, uPrefixLengthB) = scoreBest; } // Insert I=GapA+LetterB { SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + PB[uPrefixLengthB - 1].m_scoreGapOpen; SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend.get(); SCORE scoreBest; if (scoreMI >= scoreII) { scoreBest = scoreMI; TBI(uPrefixLengthA, uPrefixLengthB) = 'M'; } else { assert(scoreII > scoreMI); scoreBest = scoreII; TBI(uPrefixLengthA, uPrefixLengthB) = 'I'; } DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest; } // Insert J=GapA+LetterB { SCORE scoreMJ = DPM(uPrefixLengthA, uPrefixLengthB-1) + PB[uPrefixLengthB - 1].m_scoreGapOpen2; SCORE scoreJJ = DPJ(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend2.get(); SCORE scoreBest; if (scoreMJ >= scoreJJ) { scoreBest = scoreMJ; TBJ(uPrefixLengthA, uPrefixLengthB) = 'M'; } else { assert(scoreJJ > scoreMJ); scoreBest = scoreJJ; TBJ(uPrefixLengthA, uPrefixLengthB) = 'J'; } DPJ(uPrefixLengthA, uPrefixLengthB) = scoreBest; } scoreGapCloseA = PPA.m_scoreGapClose; scoreGapClose2A = PPA.m_scoreGapClose2; } scoreGapCloseB = PPB.m_scoreGapClose; scoreGapClose2B = PPB.m_scoreGapClose2; } #if TRACE Log("\n"); Log("DA Simple DPL:\n"); ListDP(DPL_, PA, PB, uPrefixCountA, uPrefixCountB); Log("\n"); Log("DA Simple DPM:\n"); ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB); Log("\n"); Log("DA Simple DPD:\n"); ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB); Log("\n"); Log("DA Simple DPE:\n"); ListDP(DPE_, PA, PB, uPrefixCountA, uPrefixCountB); Log("\n"); Log("DA Simple DPI:\n"); ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB); Log("\n"); Log("DA Simple DPJ:\n"); ListDP(DPJ_, PA, PB, uPrefixCountA, uPrefixCountB); Log("\n"); Log("DA Simple TBM:\n"); ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB); Log("\n"); Log("DA Simple TBD:\n"); ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB); Log("\n"); Log("DA Simple TBE:\n"); ListTB(TBE_, PA, PB, uPrefixCountA, uPrefixCountB); Log("\n"); Log("DA Simple TBI:\n"); ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB); Log("\n"); Log("DA Simple TBJ:\n"); ListTB(TBJ_, PA, PB, uPrefixCountA, uPrefixCountB); #endif // Trace-back // ========== Path.Clear(); // Find last edge SCORE M = DPM(uLengthA, uLengthB); SCORE D = DPD(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose; SCORE E = DPE(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose2; SCORE I = DPI(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose; SCORE J = DPJ(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose2; char cEdgeType = '?'; SCORE BestScore = M; cEdgeType = 'M'; if (D > BestScore) { cEdgeType = 'D'; BestScore = D; } if (E > BestScore) { cEdgeType = 'E'; BestScore = E; } if (I > BestScore) { cEdgeType = 'I'; BestScore = I; } if (J > BestScore) { cEdgeType = 'J'; BestScore = J; } #if TRACE Log("DA Simple: MAB=%.4g DAB=%.4g EAB=%.4g IAB=%.4g JAB=%.4g best=%c\n", M, D, E, I, J, cEdgeType); #endif unsigned PLA = uLengthA; unsigned PLB = uLengthB; for (;;) { PWEdge Edge; Edge.cType = XlatEdgeType(cEdgeType); Edge.uPrefixLengthA = PLA; Edge.uPrefixLengthB = PLB; #if TRACE Log("Prepend %c%d.%d\n", Edge.cType, PLA, PLB); #endif Path.PrependEdge(Edge); switch (cEdgeType) { case 'M': assert(PLA > 0); assert(PLB > 0); cEdgeType = TBM(PLA, PLB); --PLA; --PLB; break; case 'D': assert(PLA > 0); cEdgeType = TBD(PLA, PLB); --PLA; break; case 'E': assert(PLA > 0); cEdgeType = TBE(PLA, PLB); --PLA; break; case 'I': assert(PLB > 0); cEdgeType = TBI(PLA, PLB); --PLB; break; case 'J': assert(PLB > 0); cEdgeType = TBJ(PLA, PLB); --PLB; break; default: Quit("Invalid edge %c", cEdgeType); } if (0 == PLA && 0 == PLB) break; } Path.Validate(); // SCORE Score = TraceBack(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_, Path); #if TRACE SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path); Path.LogMe(); Log("Score = %s Path = %s\n", LocalScoreToStr(BestScore), LocalScoreToStr(scorePath)); #endif if (g_bKeepSimpleDP.get()) { g_DPM.get() = DPM_; g_DPD.get() = DPD_; g_DPE.get() = DPE_; g_DPI.get() = DPI_; g_DPJ.get() = DPJ_; g_TBM.get() = TBM_; g_TBD.get() = TBD_; g_TBE.get() = TBE_; g_TBI.get() = TBI_; g_TBJ.get() = TBJ_; } else { delete[] DPM_; delete[] DPD_; delete[] DPE_; delete[] DPI_; delete[] DPJ_; delete[] TBM_; delete[] TBD_; delete[] TBE_; delete[] TBI_; delete[] TBJ_; } return BestScore; }
void EstringsToPath(const short esA[], const short esB[], PWPath &Path) { Path.Clear(); unsigned iA = 0; unsigned iB = 0; int nA = esA[iA++]; int nB = esB[iB++]; unsigned uPrefixLengthA = 0; unsigned uPrefixLengthB = 0; for (;;) { char cType; if (nA > 0) { if (nB > 0) { cType = 'M'; --nA; --nB; } else if (nB < 0) { cType = 'D'; --nA; ++nB; } else assert(false); } else if (nA < 0) { if (nB > 0) { cType = 'I'; ++nA; --nB; } else assert(false); } else assert(false); switch (cType) { case 'M': ++uPrefixLengthA; ++uPrefixLengthB; break; case 'D': ++uPrefixLengthA; break; case 'I': ++uPrefixLengthB; break; } PWEdge Edge; Edge.cType = cType; Edge.uPrefixLengthA = uPrefixLengthA; Edge.uPrefixLengthB = uPrefixLengthB; Path.AppendEdge(Edge); if (nA == 0) { if (0 == esA[iA]) { assert(0 == esB[iB]); break; } nA = esA[iA++]; } if (nB == 0) nB = esB[iB++]; } }
SCORE TraceBack(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, unsigned uLengthB, const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_, PWPath &Path) { #if TRACE Log("\n"); Log("TraceBack LengthA=%u LengthB=%u\n", uLengthA, uLengthB); #endif assert(uLengthB > 0 && uLengthA > 0); const unsigned uPrefixCountA = uLengthA + 1; const unsigned uPrefixCountB = uLengthB + 1; Path.Clear(); unsigned uPrefixLengthA = uLengthA; unsigned uPrefixLengthB = uLengthB; const SCORE scoreM = DPM(uPrefixLengthA, uPrefixLengthB); SCORE scoreD = DPD(uPrefixLengthA, uPrefixLengthB); SCORE scoreI = DPI(uPrefixLengthA, uPrefixLengthB); const ProfPos &LastPPA = PA[uLengthA - 1]; const ProfPos &LastPPB = PB[uLengthB - 1]; scoreD += LastPPA.m_scoreGapClose; scoreI += LastPPB.m_scoreGapClose; char cEdgeType = cInsane; SCORE scoreMax; if (scoreM >= scoreD && scoreM >= scoreI) { scoreMax = scoreM; cEdgeType = 'M'; } else if (scoreD >= scoreM && scoreD >= scoreI) { scoreMax = scoreD; cEdgeType = 'D'; } else { assert(scoreI >= scoreM && scoreI >= scoreD); scoreMax = scoreI; cEdgeType = 'I'; } for (;;) { if ('S' == cEdgeType) break; PWEdge Edge; Edge.cType = cEdgeType; Edge.uPrefixLengthA = uPrefixLengthA; Edge.uPrefixLengthB = uPrefixLengthB; Path.PrependEdge(Edge); char cPrevEdgeType; unsigned uPrevPrefixLengthA = uPrefixLengthA; unsigned uPrevPrefixLengthB = uPrefixLengthB; switch (cEdgeType) { case 'M': { assert(uPrefixLengthA > 0); assert(uPrefixLengthB > 0); const ProfPos &PPA = PA[uPrefixLengthA - 1]; const ProfPos &PPB = PB[uPrefixLengthB - 1]; const SCORE Score = DPM(uPrefixLengthA, uPrefixLengthB); const SCORE scoreMatch = ScoreProfPos2(PPA, PPB); SCORE scoreSM; if (1 == uPrefixLengthA && 1 == uPrefixLengthB) scoreSM = scoreMatch; else scoreSM = MINUS_INFINITY; SCORE scoreMM = MINUS_INFINITY; SCORE scoreDM = MINUS_INFINITY; SCORE scoreIM = MINUS_INFINITY; if (uPrefixLengthA > 1 && uPrefixLengthB > 1) scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1) + scoreMatch; if (uPrefixLengthA > 1) { SCORE scoreTransDM = PA[uPrefixLengthA-2].m_scoreGapClose; scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreTransDM + scoreMatch; } if (uPrefixLengthB > 1) { SCORE scoreTransIM = PB[uPrefixLengthB-2].m_scoreGapClose; scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreTransIM + scoreMatch; } if (EQ(scoreMM, Score)) cPrevEdgeType = 'M'; else if (EQ(scoreDM, Score)) cPrevEdgeType = 'D'; else if (EQ(scoreIM, Score)) cPrevEdgeType = 'I'; else if (EQ(scoreSM, Score)) cPrevEdgeType = 'S'; else Quit("TraceBack: failed to match M score=%g M=%g D=%g I=%g S=%g", Score, scoreMM, scoreDM, scoreIM, scoreSM); --uPrevPrefixLengthA; --uPrevPrefixLengthB; break; } case 'D': { assert(uPrefixLengthA > 0); const SCORE Score = DPD(uPrefixLengthA, uPrefixLengthB); SCORE scoreMD = MINUS_INFINITY; SCORE scoreDD = MINUS_INFINITY; SCORE scoreSD = MINUS_INFINITY; if (uPrefixLengthB == 0) { if (uPrefixLengthA == 1) scoreSD = PA[0].m_scoreGapOpen; else scoreSD = DPD(uPrefixLengthA - 1, 0); } if (uPrefixLengthA > 1) { const ProfPos &PPA = PA[uPrefixLengthA - 1]; SCORE scoreTransMD = PPA.m_scoreGapOpen; scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + scoreTransMD; scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB); } if (EQ(Score, scoreMD)) cPrevEdgeType = 'M'; else if (EQ(Score, scoreDD)) cPrevEdgeType = 'D'; else if (EQ(Score, scoreSD)) cPrevEdgeType = 'S'; else Quit("TraceBack: failed to match D"); --uPrevPrefixLengthA; break; } case 'I': { assert(uPrefixLengthB > 0); const SCORE Score = DPI(uPrefixLengthA, uPrefixLengthB); SCORE scoreMI = MINUS_INFINITY; SCORE scoreII = MINUS_INFINITY; SCORE scoreSI = MINUS_INFINITY; if (uPrefixLengthA == 0) { if (uPrefixLengthB == 1) scoreSI = PB[0].m_scoreGapOpen; else scoreSI = DPI(0, uPrefixLengthB - 1); } if (uPrefixLengthB > 1) { const ProfPos &PPB = PB[uPrefixLengthB - 1]; SCORE scoreTransMI = PPB.m_scoreGapOpen; scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + scoreTransMI; scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1); } if (EQ(Score, scoreMI)) cPrevEdgeType = 'M'; else if (EQ(Score, scoreII)) cPrevEdgeType = 'I'; else if (EQ(Score, scoreSI)) cPrevEdgeType = 'S'; else Quit("TraceBack: failed to match I"); --uPrevPrefixLengthB; break; } default: assert(false); } #if TRACE Log("Edge %c%c%u.%u", cPrevEdgeType, cEdgeType, uPrefixLengthA, uPrefixLengthB); Log("\n"); #endif cEdgeType = cPrevEdgeType; uPrefixLengthA = uPrevPrefixLengthA; uPrefixLengthB = uPrevPrefixLengthB; } return scoreMax; }
void SPTest() { SetPPScore(PPSCORE_SV); SetListFileName("c:\\tmp\\muscle.log", false); TextFile file1("c:\\tmp\\msa1.afa"); TextFile file2("c:\\tmp\\msa2.afa"); MSA msa1; MSA msa2; msa1.FromFile(file1); msa2.FromFile(file2); Log("msa1=\n"); msa1.LogMe(); Log("msa2=\n"); msa2.LogMe(); const unsigned uColCount = msa1.GetColCount(); if (msa2.GetColCount() != uColCount) Quit("Different lengths"); const unsigned uSeqCount1 = msa1.GetSeqCount(); const unsigned uSeqCount2 = msa2.GetSeqCount(); const unsigned uSeqCount = uSeqCount1 + uSeqCount2; MSA::SetIdCount(uSeqCount); for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1) { msa1.SetSeqWeight(uSeqIndex1, 1.0); msa1.SetSeqId(uSeqIndex1, uSeqIndex1); } for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2) { msa2.SetSeqWeight(uSeqIndex2, 1.0); msa2.SetSeqId(uSeqIndex2, uSeqCount1 + uSeqIndex2); } MSA alnA; MSA alnB; char strPathA[1024]; char strPathB[1024]; MakePath(uColCount, INDELS, strPathA); MakePath(uColCount, INDELS, strPathB); PWPath PathA; PWPath PathB; PathA.FromStr(strPathA); PathB.FromStr(strPathB); Log("PathA=\n"); PathA.LogMe(); Log("PathB=\n"); PathB.LogMe(); AlignTwoMSAsGivenPath(PathA, msa1, msa2, alnA); AlignTwoMSAsGivenPath(PathB, msa1, msa2, alnB); for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { alnA.SetSeqWeight(uSeqIndex, 1.0); alnB.SetSeqWeight(uSeqIndex, 1.0); } unsigned Seqs1[1024]; unsigned Seqs2[1024]; for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1) Seqs1[uSeqIndex1] = uSeqIndex1; for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2) Seqs2[uSeqIndex2] = uSeqCount1 + uSeqIndex2; MSA msaA1; MSA msaA2; MSA msaB1; MSA msaB2; MSAFromSeqSubset(alnA, Seqs1, uSeqCount1, msaA1); MSAFromSeqSubset(alnB, Seqs1, uSeqCount1, msaB1); MSAFromSeqSubset(alnA, Seqs2, uSeqCount2, msaA2); MSAFromSeqSubset(alnB, Seqs2, uSeqCount2, msaB2); for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1) { msaA1.SetSeqWeight(uSeqIndex1, 1.0); msaB1.SetSeqWeight(uSeqIndex1, 1.0); } for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2) { msaA2.SetSeqWeight(uSeqIndex2, 1.0); msaB2.SetSeqWeight(uSeqIndex2, 1.0); } Log("msaA1=\n"); msaA1.LogMe(); Log("msaB1=\n"); msaB1.LogMe(); Log("msaA2=\n"); msaA2.LogMe(); Log("msaB2=\n"); msaB2.LogMe(); Log("alnA=\n"); alnA.LogMe(); Log("AlnB=\n"); alnB.LogMe(); Log("\nSPA\n---\n"); SCORE SPA = ObjScoreSP(alnA); Log("\nSPB\n---\n"); SCORE SPB = ObjScoreSP(alnB); Log("\nXPA\n---\n"); SCORE XPA = ObjScoreXP(msaA1, msaA2); Log("\nXPB\n---\n"); SCORE XPB = ObjScoreXP(msaB1, msaB2); Log("SPA=%.4g SPB=%.4g Diff=%.4g\n", SPA, SPB, SPA - SPB); Log("XPA=%.4g XPB=%.4g Diff=%.4g\n", XPA, XPB, XPA - XPB); }