void ClusterByHeight(const Tree &tree, double dMaxHeight, unsigned Subtrees[], unsigned *ptruSubtreeCount) { if (!tree.IsRooted()) Quit("ClusterByHeight: requires rooted tree"); #if TRACE Log("ClusterByHeight, max height=%g\n", dMaxHeight); #endif unsigned uSubtreeCount = 0; const unsigned uNodeCount = tree.GetNodeCount(); for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (tree.IsRoot(uNodeIndex)) continue; unsigned uParent = tree.GetParent(uNodeIndex); double dHeight = tree.GetNodeHeight(uNodeIndex); double dParentHeight = tree.GetNodeHeight(uParent); #if TRACE Log("Node %3u Height %5.2f ParentHeight %5.2f\n", uNodeIndex, dHeight, dParentHeight); #endif if (dParentHeight > dMaxHeight && dHeight <= dMaxHeight) { Subtrees[uSubtreeCount] = uNodeIndex; #if TRACE Log("Subtree[%u]=%u\n", uSubtreeCount, uNodeIndex); #endif ++uSubtreeCount; } } *ptruSubtreeCount = uSubtreeCount; }
//------------------------------------------------------------------------------ // Copy constructor Tree::Tree (const Tree &t) { if (t.GetRoot() == NULL) { Root = NULL; CurNode = NULL; Leaves = 0; Internals = 0; Error = 0; InternalLabels = false; EdgeLengths = false; Nodes = NULL; Name = ""; Rooted = false; Weight = 1.0; } else { CurNode = t.GetRoot(); NodePtr placeHolder; t.copyTraverse (CurNode, placeHolder ); Root = placeHolder; Leaves = t.GetNumLeaves (); Internals = t.GetNumInternals (); Name = t.GetName (); CurNode = NULL; Error = 0; InternalLabels = t.GetHasInternalLabels ();; EdgeLengths = t.GetHasEdgeLengths (); Nodes = NULL; Rooted = t.IsRooted(); Weight = t.GetWeight(); } }
// Return false when done bool PhyEnumEdges(const Tree &tree, PhyEnumEdgeState &ES) { unsigned uNode1 = uInsane; if (!ES.m_bInit) { if (tree.GetNodeCount() <= 1) { ES.m_uNodeIndex1 = NULL_NEIGHBOR; ES.m_uNodeIndex2 = NULL_NEIGHBOR; return false; } uNode1 = tree.FirstDepthFirstNode(); ES.m_bInit = true; } else { uNode1 = tree.NextDepthFirstNode(ES.m_uNodeIndex1); if (NULL_NEIGHBOR == uNode1) return false; if (tree.IsRooted() && tree.IsRoot(uNode1)) { uNode1 = tree.NextDepthFirstNode(uNode1); if (NULL_NEIGHBOR == uNode1) return false; } } unsigned uNode2 = tree.GetParent(uNode1); ES.m_uNodeIndex1 = uNode1; ES.m_uNodeIndex2 = uNode2; return true; }
void FixRoot(Tree &tree, ROOT Method) { if (!tree.IsRooted()) Quit("FixRoot: expecting rooted tree"); // Pseudo-root: keep root assigned by clustering if (ROOT_Pseudo == Method) return; tree.UnrootByDeletingRoot(); tree.RootUnrootedTree(Method); }
bool PhyEnumBiParts(const Tree &tree, PhyEnumEdgeState &ES, unsigned Leaves1[], unsigned *ptruCount1, unsigned Leaves2[], unsigned *ptruCount2) { bool bOk = PhyEnumEdges(tree, ES); if (!bOk) { *ptruCount1 = 0; *ptruCount2 = 0; return false; } // Special case: in a rooted tree, both edges from the root // give the same bipartition, so skip one of them. if (tree.IsRooted() && tree.IsRoot(ES.m_uNodeIndex2) && tree.GetRight(ES.m_uNodeIndex2) == ES.m_uNodeIndex1) { bOk = PhyEnumEdges(tree, ES); if (!bOk) return false; } PhyGetLeaves(tree, ES.m_uNodeIndex1, ES.m_uNodeIndex2, Leaves1, ptruCount1); PhyGetLeaves(tree, ES.m_uNodeIndex2, ES.m_uNodeIndex1, Leaves2, ptruCount2); if (*ptruCount1 + *ptruCount2 != tree.GetLeafCount()) Quit("PhyEnumBiParts %u + %u != %u", *ptruCount1, *ptruCount2, tree.GetLeafCount()); #if DEBUG { for (unsigned i = 0; i < *ptruCount1; ++i) { if (!tree.IsLeaf(Leaves1[i])) Quit("PhyEnumByParts: not leaf"); for (unsigned j = 0; j < *ptruCount2; ++j) { if (!tree.IsLeaf(Leaves2[j])) Quit("PhyEnumByParts: not leaf"); if (Leaves1[i] == Leaves2[j]) Quit("PhyEnumByParts: dupe"); } } } #endif return true; }
void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[]) { #if TRACE Log("CalcClustalWWeights\n"); tree.LogMe(); #endif const unsigned uLeafCount = tree.GetLeafCount(); if (0 == uLeafCount) return; else if (1 == uLeafCount) { Weights[0] = (WEIGHT) 1.0; return; } else if (2 == uLeafCount) { Weights[0] = (WEIGHT) 0.5; Weights[1] = (WEIGHT) 0.5; return; } if (!tree.IsRooted()) Quit("CalcClustalWWeights requires rooted tree"); const unsigned uNodeCount = tree.GetNodeCount(); unsigned *LeavesUnderNode = new unsigned[uNodeCount]; memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned)); const unsigned uRootNodeIndex = tree.GetRootNodeIndex(); unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode); if (uLeavesUnderRoot != uLeafCount) Quit("WeightsFromTreee: Internal error, root count %u %u", uLeavesUnderRoot, uLeafCount); #if TRACE Log("Node Leaves Length Strength\n"); Log("---- ------ -------- --------\n"); // 1234 123456 12345678 12345678 #endif double *Strengths = new double[uNodeCount]; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (tree.IsRoot(uNodeIndex)) { Strengths[uNodeIndex] = 0.0; continue; } const unsigned uParent = tree.GetParent(uNodeIndex); const double dLength = tree.GetEdgeLength(uNodeIndex, uParent); const unsigned uLeaves = LeavesUnderNode[uNodeIndex]; const double dStrength = dLength / (double) uLeaves; Strengths[uNodeIndex] = dStrength; #if TRACE Log("%4u %6u %8g %8g\n", uNodeIndex, uLeaves, dLength, dStrength); #endif } #if TRACE Log("\n"); Log(" Seq Path..Weight\n"); Log("-------------------- ------------\n"); #endif for (unsigned n = 0; n < uLeafCount; ++n) { const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n); #if TRACE Log("%20.20s %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex); #endif if (!tree.IsLeaf(uLeafNodeIndex)) Quit("CalcClustalWWeights: leaf"); double dWeight = 0; unsigned uNode = uLeafNodeIndex; while (!tree.IsRoot(uNode)) { dWeight += Strengths[uNode]; uNode = tree.GetParent(uNode); #if TRACE Log("->%u(%g)", uNode, Strengths[uNode]); #endif } if (dWeight < 0.0001) { #if TRACE Log("zero->one"); #endif dWeight = 1.0; } Weights[n] = (WEIGHT) dWeight; #if TRACE Log(" = %g\n", dWeight); #endif } delete[] Strengths; delete[] LeavesUnderNode; Normalize(Weights, uLeafCount); }
void DoMuscle() { SetOutputFileName(g_pstrOutFileName.get()); SetInputFileName(g_pstrInFileName.get()); SetMaxIters(g_uMaxIters.get()); SetSeqWeightMethod(g_SeqWeight1.get()); TextFile fileIn(g_pstrInFileName.get()); SeqVect v; v.FromFASTAFile(fileIn); const unsigned uSeqCount = v.Length(); if (0 == uSeqCount) Quit("No sequences in input file"); ALPHA Alpha = ALPHA_Undefined; switch (g_SeqType.get()) { case SEQTYPE_Auto: Alpha = v.GuessAlpha(); break; case SEQTYPE_Protein: Alpha = ALPHA_Amino; break; case SEQTYPE_DNA: Alpha = ALPHA_DNA; break; case SEQTYPE_RNA: Alpha = ALPHA_RNA; break; default: Quit("Invalid seq type"); } SetAlpha(Alpha); v.FixAlpha(); // // AED 21/12/06: Moved matrix loading code inside the PP param function so it gets called for all alignment types // SetPPScore(); unsigned uMaxL = 0; unsigned uTotL = 0; for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { unsigned L = v.GetSeq(uSeqIndex).Length(); uTotL += L; if (L > uMaxL) uMaxL = L; } SetIter(1); g_bDiags.get() = g_bDiags1.get(); SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount); SetMuscleSeqVect(v); MSA::SetIdCount(uSeqCount); // Initialize sequence ids. // From this point on, ids must somehow propogate from here. for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) v.SetSeqId(uSeqIndex, uSeqIndex); if (0 == uSeqCount) Quit("Input file '%s' has no sequences", g_pstrInFileName.get()); if (1 == uSeqCount) { TextFile fileOut(g_pstrOutFileName.get(), true); v.ToFile(fileOut); return; } if (uSeqCount > 1) MHackStart(v); // First iteration Tree GuideTree; if (0 != g_pstrUseTreeFileName.get()) { // Discourage users... if (!g_bUseTreeNoWarn.get()) fprintf(stderr, g_strUseTreeWarning); // Read tree from file TextFile TreeFile(g_pstrUseTreeFileName.get()); GuideTree.FromFile(TreeFile); // Make sure tree is rooted if (!GuideTree.IsRooted()) Quit("User tree must be rooted"); if (GuideTree.GetLeafCount() != uSeqCount) Quit("User tree does not match input sequences"); const unsigned uNodeCount = GuideTree.GetNodeCount(); for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (!GuideTree.IsLeaf(uNodeIndex)) continue; const char *LeafName = GuideTree.GetLeafName(uNodeIndex); unsigned uSeqIndex; bool SeqFound = v.FindName(LeafName, &uSeqIndex); if (!SeqFound) Quit("Label %s in tree does not match sequences", LeafName); unsigned uId = v.GetSeqIdFromName(LeafName); GuideTree.SetLeafId(uNodeIndex, uId); } } else TreeFromSeqVect(v, GuideTree, g_Cluster1.get(), g_Distance1.get(), g_Root1.get(), g_pstrDistMxFileName1.get()); const char *Tree1 = ValueOpt("Tree1"); if (0 != Tree1) { TextFile f(Tree1, true); GuideTree.ToFile(f); if (g_bClusterOnly.get()) return; } SetMuscleTree(GuideTree); ValidateMuscleIds(GuideTree); MSA msa; ProgNode *ProgNodes = 0; if (g_bLow.get()) ProgNodes = ProgressiveAlignE(v, GuideTree, msa); else ProgressiveAlign(v, GuideTree, msa); SetCurrentAlignment(msa); if (0 != g_pstrComputeWeightsFileName.get()) { extern void OutWeights(const char *FileName, const MSA &msa); SetMSAWeightsMuscle(msa); OutWeights(g_pstrComputeWeightsFileName.get(), msa); return; } ValidateMuscleIds(msa); if (1 == g_uMaxIters.get() || 2 == uSeqCount) { //TextFile fileOut(g_pstrOutFileName.get(), true); //MHackEnd(msa); //msa.ToFile(fileOut); MuscleOutput(msa); return; } if (0 == g_pstrUseTreeFileName.get()) { g_bDiags.get() = g_bDiags2.get(); SetIter(2); if (g_bLow.get()) { if (0 != g_uMaxTreeRefineIters.get()) RefineTreeE(msa, v, GuideTree, ProgNodes); } else RefineTree(msa, GuideTree); const char *Tree2 = ValueOpt("Tree2"); if (0 != Tree2) { TextFile f(Tree2, true); GuideTree.ToFile(f); } } SetSeqWeightMethod(g_SeqWeight2.get()); SetMuscleTree(GuideTree); if (g_bAnchors.get()) RefineVert(msa, GuideTree, g_uMaxIters.get() - 2); else RefineHoriz(msa, GuideTree, g_uMaxIters.get() - 2, false, false); #if 0 // Refining by subfamilies is disabled as it didn't give better // results. I tried doing this before and after RefineHoriz. // Should get back to this as it seems like this should work. RefineSubfams(msa, GuideTree, g_uMaxIters.get() - 2); #endif ValidateMuscleIds(msa); ValidateMuscleIds(GuideTree); //TextFile fileOut(g_pstrOutFileName.get(), true); //MHackEnd(msa); //msa.ToFile(fileOut); MuscleOutput(msa); }
void DoMuscle(CompositeVect*CVLocation) { SetOutputFileName(g_pstrOutFileName); SetInputFileName(g_pstrInFileName); SetMaxIters(g_uMaxIters); SetSeqWeightMethod(g_SeqWeight1); TextFile fileIn(g_pstrInFileName); SeqVect v; v.FromFASTAFile(fileIn); const unsigned uSeqCount = v.Length(); if (0 == uSeqCount) Quit("No sequences in input file"); ALPHA Alpha = ALPHA_Undefined; switch (g_SeqType) { case SEQTYPE_Auto: Alpha = v.GuessAlpha(); break; case SEQTYPE_Protein: Alpha = ALPHA_Amino; break; case SEQTYPE_DNA: Alpha = ALPHA_DNA; break; case SEQTYPE_RNA: Alpha = ALPHA_RNA; break; default: Quit("Invalid seq type"); } SetAlpha(Alpha); v.FixAlpha(); PTR_SCOREMATRIX UserMatrix = 0; if (0 != g_pstrMatrixFileName) { const char *FileName = g_pstrMatrixFileName; const char *Path = getenv("MUSCLE_MXPATH"); if (Path != 0) { size_t n = strlen(Path) + 1 + strlen(FileName) + 1; char *NewFileName = new char[n]; sprintf(NewFileName, "%s/%s", Path, FileName); FileName = NewFileName; } TextFile File(FileName); UserMatrix = ReadMx(File); g_Alpha = ALPHA_Amino; g_PPScore = PPSCORE_SP; } SetPPScore(); if (0 != UserMatrix) g_ptrScoreMatrix = UserMatrix; unsigned uMaxL = 0; unsigned uTotL = 0; for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { unsigned L = v.GetSeq(uSeqIndex).Length(); uTotL += L; if (L > uMaxL) uMaxL = L; } SetIter(1); g_bDiags = g_bDiags1; SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount); SetMuscleSeqVect(v); MSA::SetIdCount(uSeqCount); // Initialize sequence ids. // From this point on, ids must somehow propogate from here. for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) v.SetSeqId(uSeqIndex, uSeqIndex); if (0 == uSeqCount) Quit("Input file '%s' has no sequences", g_pstrInFileName); if (1 == uSeqCount) { TextFile fileOut(g_pstrOutFileName, true); v.ToFile(fileOut); return; } if (uSeqCount > 1) MHackStart(v); // First iteration Tree GuideTree; if (0 != g_pstrUseTreeFileName) { // Discourage users... if (!g_bUseTreeNoWarn) fprintf(stderr, "%s", g_strUseTreeWarning); // Read tree from file TextFile TreeFile(g_pstrUseTreeFileName); GuideTree.FromFile(TreeFile); // Make sure tree is rooted if (!GuideTree.IsRooted()) Quit("User tree must be rooted"); if (GuideTree.GetLeafCount() != uSeqCount) Quit("User tree does not match input sequences"); const unsigned uNodeCount = GuideTree.GetNodeCount(); for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (!GuideTree.IsLeaf(uNodeIndex)) continue; const char *LeafName = GuideTree.GetLeafName(uNodeIndex); unsigned uSeqIndex; bool SeqFound = v.FindName(LeafName, &uSeqIndex); if (!SeqFound) Quit("Label %s in tree does not match sequences", LeafName); unsigned uId = v.GetSeqIdFromName(LeafName); GuideTree.SetLeafId(uNodeIndex, uId); } } else TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1, g_pstrDistMxFileName1); const char *Tree1 = ValueOpt("Tree1"); if (0 != Tree1) { TextFile f(Tree1, true); GuideTree.ToFile(f); if (g_bClusterOnly) return; } SetMuscleTree(GuideTree); ValidateMuscleIds(GuideTree); MSA msa; msa.SetCompositeVector(CVLocation); ProgNode *ProgNodes = 0; if (g_bLow) ProgNodes = ProgressiveAlignE(v, GuideTree, msa); else ProgressiveAlign(v, GuideTree, msa); SetCurrentAlignment(msa); if (0 != g_pstrComputeWeightsFileName) { extern void OutWeights(const char *FileName, const MSA &msa); SetMSAWeightsMuscle(msa); OutWeights(g_pstrComputeWeightsFileName, msa); return; } ValidateMuscleIds(msa); if (1 == g_uMaxIters || 2 == uSeqCount) { //TextFile fileOut(g_pstrOutFileName, true); //MHackEnd(msa); //msa.ToFile(fileOut); MuscleOutput(msa); return; } if (0 == g_pstrUseTreeFileName) { g_bDiags = g_bDiags2; SetIter(2); if (g_bLow) { if (0 != g_uMaxTreeRefineIters) RefineTreeE(msa, v, GuideTree, ProgNodes); } else RefineTree(msa, GuideTree); const char *Tree2 = ValueOpt("Tree2"); if (0 != Tree2) { TextFile f(Tree2, true); GuideTree.ToFile(f); } } SetSeqWeightMethod(g_SeqWeight2); SetMuscleTree(GuideTree); if (g_bAnchors) RefineVert(msa, GuideTree, g_uMaxIters - 2); else RefineHoriz(msa, GuideTree, g_uMaxIters - 2, false, false); #if 0 // Refining by subfamilies is disabled as it didn't give better // results. I tried doing this before and after RefineHoriz. // Should get back to this as it seems like this should work. RefineSubfams(msa, GuideTree, g_uMaxIters - 2); #endif ValidateMuscleIds(msa); ValidateMuscleIds(GuideTree); //TextFile fileOut(g_pstrOutFileName, true); //MHackEnd(msa); //msa.ToFile(fileOut); MuscleOutput(msa); }
void Tree::PruneTree(const Tree &tree, unsigned Subfams[], unsigned uSubfamCount) { if (!tree.IsRooted()) Quit("Tree::PruneTree: requires rooted tree"); Clear(); m_uNodeCount = 2*uSubfamCount - 1; InitCache(m_uNodeCount); const unsigned uUnprunedNodeCount = tree.GetNodeCount(); unsigned *uUnprunedToPrunedIndex = new unsigned[uUnprunedNodeCount]; unsigned *uPrunedToUnprunedIndex = new unsigned[m_uNodeCount]; for (unsigned n = 0; n < uUnprunedNodeCount; ++n) uUnprunedToPrunedIndex[n] = NULL_NEIGHBOR; for (unsigned n = 0; n < m_uNodeCount; ++n) uPrunedToUnprunedIndex[n] = NULL_NEIGHBOR; // Create mapping between unpruned and pruned node indexes unsigned uInternalNodeIndex = uSubfamCount; for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex) { unsigned uUnprunedNodeIndex = Subfams[uSubfamIndex]; uUnprunedToPrunedIndex[uUnprunedNodeIndex] = uSubfamIndex; uPrunedToUnprunedIndex[uSubfamIndex] = uUnprunedNodeIndex; for (;;) { uUnprunedNodeIndex = tree.GetParent(uUnprunedNodeIndex); if (tree.IsRoot(uUnprunedNodeIndex)) break; // Already visited this node? if (NULL_NEIGHBOR != uUnprunedToPrunedIndex[uUnprunedNodeIndex]) break; uUnprunedToPrunedIndex[uUnprunedNodeIndex] = uInternalNodeIndex; uPrunedToUnprunedIndex[uInternalNodeIndex] = uUnprunedNodeIndex; ++uInternalNodeIndex; } } const unsigned uUnprunedRootIndex = tree.GetRootNodeIndex(); uUnprunedToPrunedIndex[uUnprunedRootIndex] = uInternalNodeIndex; uPrunedToUnprunedIndex[uInternalNodeIndex] = uUnprunedRootIndex; #if TRACE { Log("Pruned to unpruned:\n"); for (unsigned i = 0; i < m_uNodeCount; ++i) Log(" [%u]=%u", i, uPrunedToUnprunedIndex[i]); Log("\n"); Log("Unpruned to pruned:\n"); for (unsigned i = 0; i < uUnprunedNodeCount; ++i) { unsigned n = uUnprunedToPrunedIndex[i]; if (n != NULL_NEIGHBOR) Log(" [%u]=%u", i, n); } Log("\n"); } #endif if (uInternalNodeIndex != m_uNodeCount - 1) Quit("Tree::PruneTree, Internal error"); // Nodes 0, 1 ... are the leaves for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex) { char szName[32]; sprintf(szName, "Subfam_%u", uSubfamIndex + 1); m_ptrName[uSubfamIndex] = strsave(szName); } for (unsigned uPrunedNodeIndex = uSubfamCount; uPrunedNodeIndex < m_uNodeCount; ++uPrunedNodeIndex) { unsigned uUnprunedNodeIndex = uPrunedToUnprunedIndex[uPrunedNodeIndex]; const unsigned uUnprunedLeft = tree.GetLeft(uUnprunedNodeIndex); const unsigned uUnprunedRight = tree.GetRight(uUnprunedNodeIndex); const unsigned uPrunedLeft = uUnprunedToPrunedIndex[uUnprunedLeft]; const unsigned uPrunedRight = uUnprunedToPrunedIndex[uUnprunedRight]; const double dLeftLength = tree.GetEdgeLength(uUnprunedNodeIndex, uUnprunedLeft); const double dRightLength = tree.GetEdgeLength(uUnprunedNodeIndex, uUnprunedRight); m_uNeighbor2[uPrunedNodeIndex] = uPrunedLeft; m_uNeighbor3[uPrunedNodeIndex] = uPrunedRight; m_dEdgeLength1[uPrunedLeft] = dLeftLength; m_dEdgeLength1[uPrunedRight] = dRightLength; m_uNeighbor1[uPrunedLeft] = uPrunedNodeIndex; m_uNeighbor1[uPrunedRight] = uPrunedNodeIndex; m_bHasEdgeLength1[uPrunedLeft] = true; m_bHasEdgeLength1[uPrunedRight] = true; m_dEdgeLength2[uPrunedNodeIndex] = dLeftLength; m_dEdgeLength3[uPrunedNodeIndex] = dRightLength; m_bHasEdgeLength2[uPrunedNodeIndex] = true; m_bHasEdgeLength3[uPrunedNodeIndex] = true; } m_uRootNodeIndex = uUnprunedToPrunedIndex[uUnprunedRootIndex]; m_bRooted = true; Validate(); delete[] uUnprunedToPrunedIndex; }
void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2, double *ptrdLength1, double *ptrdLength2, ROOT RootMethod) { #if TRACE tree.LogMe(); #endif if (tree.IsRooted()) Quit("FindRoot: tree already rooted"); const unsigned uNodeCount = tree.GetNodeCount(); const unsigned uLeafCount = tree.GetLeafCount(); if (uNodeCount < 2) Quit("Root: don't support trees with < 2 edges"); EdgeInfo **EIs = new EdgeInfo *[uNodeCount]; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) EIs[uNodeIndex] = new EdgeInfo[3]; EdgeList Edges; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) if (tree.IsLeaf(uNodeIndex)) { unsigned uParent = tree.GetNeighbor1(uNodeIndex); Edges.Add(uParent, uNodeIndex); } #if TRACE Log("Edges: "); Edges.LogMe(); #endif // Main loop: iterate until all distances known double dAllMaxDist = -1e20; unsigned uMaxFrom = NULL_NEIGHBOR; unsigned uMaxTo = NULL_NEIGHBOR; for (;;) { EdgeList NextEdges; #if TRACE Log("\nTop of main loop\n"); Log("Edges: "); Edges.LogMe(); Log("MDs:\n"); ListEIs(EIs, uNodeCount); #endif // For all edges const unsigned uEdgeCount = Edges.GetCount(); if (0 == uEdgeCount) break; for (unsigned n = 0; n < uEdgeCount; ++n) { unsigned uNodeFrom; unsigned uNodeTo; Edges.GetEdge(n, &uNodeFrom, &uNodeTo); CalcInfo(tree, uNodeFrom, uNodeTo, EIs); #if TRACE Log("Edge %u -> %u\n", uNodeFrom, uNodeTo); #endif const unsigned uNeighborCount = tree.GetNeighborCount(uNodeFrom); for (unsigned i = 0; i < uNeighborCount; ++i) { const unsigned uNeighborIndex = tree.GetNeighbor(uNodeFrom, i); if (!Known(tree, EIs, uNeighborIndex, uNodeFrom) && AllKnownOut(tree, EIs, uNeighborIndex, uNodeFrom)) NextEdges.Add(uNeighborIndex, uNodeFrom); } } Edges.Copy(NextEdges); } #if TRACE ListEIs(EIs, uNodeCount); #endif switch (RootMethod) { case ROOT_MidLongestSpan: RootByMidLongestSpan(tree, EIs, ptruNode1, ptruNode2, ptrdLength1, ptrdLength2); break; case ROOT_MinAvgLeafDist: RootByMinAvgLeafDist(tree, EIs, ptruNode1, ptruNode2, ptrdLength1, ptrdLength2); break; default: Quit("Invalid RootMethod=%d", RootMethod); } for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) delete[] EIs[uNodeIndex]; delete[] EIs; }
void DiffTreesE(const Tree &NewTree, const Tree &OldTree, unsigned NewNodeIndexToOldNodeIndex[]) { #if TRACE Log("DiffTreesE NewTree:\n"); NewTree.LogMe(); Log("\n"); Log("OldTree:\n"); OldTree.LogMe(); #endif if (!NewTree.IsRooted() || !OldTree.IsRooted()) Quit("DiffTrees: requires rooted trees"); const unsigned uNodeCount = NewTree.GetNodeCount(); const unsigned uOldNodeCount = OldTree.GetNodeCount(); const unsigned uLeafCount = NewTree.GetLeafCount(); const unsigned uOldLeafCount = OldTree.GetLeafCount(); if (uNodeCount != uOldNodeCount || uLeafCount != uOldLeafCount) Quit("DiffTreesE: different node counts"); { unsigned *IdToOldNodeIndex = new unsigned[uNodeCount]; for (unsigned uOldNodeIndex = 0; uOldNodeIndex < uNodeCount; ++uOldNodeIndex) { if (OldTree.IsLeaf(uOldNodeIndex)) { unsigned Id = OldTree.GetLeafId(uOldNodeIndex); IdToOldNodeIndex[Id] = uOldNodeIndex; } } // Initialize NewNodeIndexToOldNodeIndex[] // All internal nodes are marked as changed, but may be updated later. for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex) { if (NewTree.IsLeaf(uNewNodeIndex)) { unsigned uId = NewTree.GetLeafId(uNewNodeIndex); assert(uId < uLeafCount); unsigned uOldNodeIndex = IdToOldNodeIndex[uId]; assert(uOldNodeIndex < uNodeCount); NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldNodeIndex; } else NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED; } delete[] IdToOldNodeIndex; } // Depth-first traversal of tree. // The order guarantees that a node is visited before // its parent is visited. for (unsigned uNewNodeIndex = NewTree.FirstDepthFirstNode(); NULL_NEIGHBOR != uNewNodeIndex; uNewNodeIndex = NewTree.NextDepthFirstNode(uNewNodeIndex)) { if (NewTree.IsLeaf(uNewNodeIndex)) continue; // If either child is changed, flag this node as changed and continue. unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex); unsigned uOldLeft = NewNodeIndexToOldNodeIndex[uNewLeft]; if (NODE_CHANGED == uOldLeft) { NewNodeIndexToOldNodeIndex[uNewLeft] = NODE_CHANGED; continue; } unsigned uNewRight = NewTree.GetRight(uNewNodeIndex); unsigned uOldRight = NewNodeIndexToOldNodeIndex[uNewRight]; if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewRight]) { NewNodeIndexToOldNodeIndex[uNewRight] = NODE_CHANGED; continue; } unsigned uOldParentLeft = OldTree.GetParent(uOldLeft); unsigned uOldParentRight = OldTree.GetParent(uOldRight); if (uOldParentLeft == uOldParentRight) NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldParentLeft; else NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED; } #if TRACE { Log("NewToOld "); for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex) { Log(" [%3u]=", uNewNodeIndex); if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewNodeIndex]) Log(" X"); else Log("%3u", NewNodeIndexToOldNodeIndex[uNewNodeIndex]); if ((uNewNodeIndex+1)%8 == 0) Log("\n "); } Log("\n"); } #endif #if DEBUG { for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex) { unsigned uOld = NewNodeIndexToOldNodeIndex[uNewNodeIndex]; if (NewTree.IsLeaf(uNewNodeIndex)) { if (uOld >= uNodeCount) { Log("NewNode=%u uOld=%u > uNodeCount=%u\n", uNewNodeIndex, uOld, uNodeCount); Quit("Diff check failed"); } unsigned uIdNew = NewTree.GetLeafId(uNewNodeIndex); unsigned uIdOld = OldTree.GetLeafId(uOld); if (uIdNew != uIdOld) { Log("NewNode=%u uOld=%u IdNew=%u IdOld=%u\n", uNewNodeIndex, uOld, uIdNew, uIdOld); Quit("Diff check failed"); } continue; } if (NODE_CHANGED == uOld) continue; unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex); unsigned uNewRight = NewTree.GetRight(uNewNodeIndex); unsigned uOldLeft = OldTree.GetLeft(uOld); unsigned uOldRight = OldTree.GetRight(uOld); unsigned uNewLeftPartner = NewNodeIndexToOldNodeIndex[uNewLeft]; unsigned uNewRightPartner = NewNodeIndexToOldNodeIndex[uNewRight]; bool bSameNotRotated = (uNewLeftPartner == uOldLeft && uNewRightPartner == uOldRight); bool bSameRotated = (uNewLeftPartner == uOldRight && uNewRightPartner == uOldLeft); if (!bSameNotRotated && !bSameRotated) { Log("NewNode=%u NewL=%u NewR=%u\n", uNewNodeIndex, uNewLeft, uNewRight); Log("OldNode=%u OldL=%u OldR=%u\n", uOld, uOldLeft, uOldRight); Log("NewLPartner=%u NewRPartner=%u\n", uNewLeftPartner, uNewRightPartner); Quit("Diff check failed"); } } } #endif }
void ProgressiveAlign(const SeqVect &v, const Tree &GuideTree, MSA &a) { assert(GuideTree.IsRooted()); #if TRACE Log("GuideTree:\n"); GuideTree.LogMe(); #endif const unsigned uSeqCount = v.Length(); const unsigned uNodeCount = 2*uSeqCount - 1; ProgNode *ProgNodes = new ProgNode[uNodeCount]; unsigned uJoin = 0; unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode(); SetProgressDesc("Align node"); do { if (GuideTree.IsLeaf(uTreeNodeIndex)) { if (uTreeNodeIndex >= uNodeCount) Quit("TreeNodeIndex=%u NodeCount=%u\n", uTreeNodeIndex, uNodeCount); ProgNode &Node = ProgNodes[uTreeNodeIndex]; unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex); if (uId >= uSeqCount) Quit("Seq index out of range"); const Seq &s = *(v[uId]); Node.m_MSA.FromSeq(s); Node.m_MSA.SetSeqId(0, uId); Node.m_uLength = Node.m_MSA.GetColCount(); } else { Progress(uJoin, uSeqCount - 1); ++uJoin; const unsigned uMergeNodeIndex = uTreeNodeIndex; ProgNode &Parent = ProgNodes[uMergeNodeIndex]; const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex); const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex); ProgNode &Node1 = ProgNodes[uLeft]; ProgNode &Node2 = ProgNodes[uRight]; PWPath Path; AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, Path); Parent.m_uLength = Parent.m_MSA.GetColCount(); Node1.m_MSA.Clear(); Node2.m_MSA.Clear(); } uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex); } while (NULL_NEIGHBOR != uTreeNodeIndex); ProgressStepsDone(); unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex(); const ProgNode &RootProgNode = ProgNodes[uRootNodeIndex]; a.Copy(RootProgNode.m_MSA); delete[] ProgNodes; ProgNodes = 0; }
void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs, unsigned IdToDiffsLeafNodeIndex[]) { #if TRACE Log("Tree1:\n"); Tree1.LogMe(); Log("\n"); Log("Tree2:\n"); Tree2.LogMe(); #endif if (!Tree1.IsRooted() || !Tree2.IsRooted()) Quit("DiffTrees: requires rooted trees"); const unsigned uNodeCount = Tree1.GetNodeCount(); const unsigned uNodeCount2 = Tree2.GetNodeCount(); const unsigned uLeafCount = Tree1.GetLeafCount(); const unsigned uLeafCount2 = Tree2.GetLeafCount(); assert(uLeafCount == uLeafCount2); if (uNodeCount != uNodeCount2) Quit("DiffTrees: different node counts"); // Allocate tables so we can convert tree node index to // and from the unique id with a O(1) lookup. unsigned *NodeIndexToId1 = new unsigned[uNodeCount]; unsigned *IdToNodeIndex2 = new unsigned[uNodeCount]; bool *bIsBachelor1 = new bool[uNodeCount]; bool *bIsDiff1 = new bool[uNodeCount]; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { NodeIndexToId1[uNodeIndex] = uNodeCount; bIsBachelor1[uNodeIndex] = false; bIsDiff1[uNodeIndex] = false; // Use uNodeCount as value meaning "not set". IdToNodeIndex2[uNodeIndex] = uNodeCount; } // Initialize node index <-> id lookup tables for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (Tree1.IsLeaf(uNodeIndex)) { const unsigned uId = Tree1.GetLeafId(uNodeIndex); if (uId >= uNodeCount) Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)"); NodeIndexToId1[uNodeIndex] = uId; } if (Tree2.IsLeaf(uNodeIndex)) { const unsigned uId = Tree2.GetLeafId(uNodeIndex); if (uId >= uNodeCount) Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)"); IdToNodeIndex2[uId] = uNodeIndex; } } // Validity check. This verifies that the ids // pre-assigned to the leaves in Tree1 are unique // (note that the id<N check above does not rule // out two leaves having duplicate ids). for (unsigned uId = 0; uId < uLeafCount; ++uId) { unsigned uNodeIndex2 = IdToNodeIndex2[uId]; if (uNodeCount == uNodeIndex2) Quit("DiffTrees, check 2"); } // Ids assigned to internal nodes are N, N+1 ... // An internal node id uniquely identifies a set // of two or more leaves. unsigned uInternalNodeId = uLeafCount; // Depth-first traversal of tree. // The order guarantees that a node is visited before // its parent is visited. for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode(); NULL_NEIGHBOR != uNodeIndex1; uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1)) { #if TRACE Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n", uNodeIndex1, Tree1.IsLeaf(uNodeIndex1), bIsBachelor1[uNodeIndex1]); #endif // Leaves are trivial; nothing to do. if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1]) continue; // If either child is a bachelor, flag // this node as a bachelor and continue. unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1); if (bIsBachelor1[uLeft1]) { bIsBachelor1[uNodeIndex1] = true; continue; } unsigned uRight1 = Tree1.GetRight(uNodeIndex1); if (bIsBachelor1[uRight1]) { bIsBachelor1[uNodeIndex1] = true; continue; } // Both children are married. // Married nodes are guaranteed to have an id. unsigned uIdLeft = NodeIndexToId1[uLeft1]; unsigned uIdRight = NodeIndexToId1[uRight1]; if (uIdLeft == uNodeCount || uIdRight == uNodeCount) Quit("DiffTrees, check 5"); // uLeft2 is the spouse of uLeft1, and similarly for uRight2. unsigned uLeft2 = IdToNodeIndex2[uIdLeft]; unsigned uRight2 = IdToNodeIndex2[uIdRight]; if (uLeft2 == uNodeCount || uRight2 == uNodeCount) Quit("DiffTrees, check 6"); // If the spouses of uLeft1 and uRight1 have the same // parent, then this parent is the spouse of uNodeIndex1. // Otherwise, uNodeIndex1 is a diff. unsigned uParentLeft2 = Tree2.GetParent(uLeft2); unsigned uParentRight2 = Tree2.GetParent(uRight2); #if TRACE Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n", uLeft1, uRight1, uLeft2, uRight2, uParentLeft2, uParentRight2); #endif if (uParentLeft2 == uParentRight2) { NodeIndexToId1[uNodeIndex1] = uInternalNodeId; IdToNodeIndex2[uInternalNodeId] = uParentLeft2; ++uInternalNodeId; } else bIsBachelor1[uNodeIndex1] = true; } unsigned uDiffCount = 0; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (bIsBachelor1[uNodeIndex]) continue; if (Tree1.IsRoot(uNodeIndex)) { // Special case: if no bachelors, consider the // root a diff. if (!bIsBachelor1[uNodeIndex]) bIsDiff1[uNodeIndex] = true; continue; } const unsigned uParent = Tree1.GetParent(uNodeIndex); if (bIsBachelor1[uParent]) { bIsDiff1[uNodeIndex] = true; ++uDiffCount; } } #if TRACE Log("Tree1:\n"); Log("Node Id Bach Diff Name\n"); Log("---- ---- ---- ---- ----\n"); for (unsigned n = 0; n < uNodeCount; ++n) { Log("%4u %4u %d %d", n, NodeIndexToId1[n], bIsBachelor1[n], bIsDiff1[n]); if (Tree1.IsLeaf(n)) Log(" %s", Tree1.GetLeafName(n)); Log("\n"); } Log("\n"); Log("Tree2:\n"); Log("Node Id Name\n"); Log("---- ---- ----\n"); for (unsigned n = 0; n < uNodeCount; ++n) { Log("%4u ", n); if (Tree2.IsLeaf(n)) Log(" %s", Tree2.GetLeafName(n)); Log("\n"); } #endif Diffs.CreateRooted(); const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex(); const unsigned uRootIndex1 = Tree1.GetRootNodeIndex(); for (unsigned n = 0; n < uLeafCount; ++n) IdToDiffsLeafNodeIndex[n] = uNodeCount; BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex, IdToDiffsLeafNodeIndex); #if TRACE Log("\n"); Log("Diffs:\n"); Diffs.LogMe(); Log("\n"); Log("IdToDiffsLeafNodeIndex:"); for (unsigned n = 0; n < uLeafCount; ++n) { if (n%16 == 0) Log("\n"); else Log(" "); Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]); } Log("\n"); #endif for (unsigned n = 0; n < uLeafCount; ++n) if (IdToDiffsLeafNodeIndex[n] == uNodeCount) Quit("TreeDiffs check 7"); delete[] NodeIndexToId1; delete[] IdToNodeIndex2; delete[] bIsBachelor1; delete[] bIsDiff1; }