void ClusterByHeight(const Tree &tree, double dMaxHeight, unsigned Subtrees[], unsigned *ptruSubtreeCount) { if (!tree.IsRooted()) Quit("ClusterByHeight: requires rooted tree"); #if TRACE Log("ClusterByHeight, max height=%g\n", dMaxHeight); #endif unsigned uSubtreeCount = 0; const unsigned uNodeCount = tree.GetNodeCount(); for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (tree.IsRoot(uNodeIndex)) continue; unsigned uParent = tree.GetParent(uNodeIndex); double dHeight = tree.GetNodeHeight(uNodeIndex); double dParentHeight = tree.GetNodeHeight(uParent); #if TRACE Log("Node %3u Height %5.2f ParentHeight %5.2f\n", uNodeIndex, dHeight, dParentHeight); #endif if (dParentHeight > dMaxHeight && dHeight <= dMaxHeight) { Subtrees[uSubtreeCount] = uNodeIndex; #if TRACE Log("Subtree[%u]=%u\n", uSubtreeCount, uNodeIndex); #endif ++uSubtreeCount; } } *ptruSubtreeCount = uSubtreeCount; }
static unsigned SubFamRecurse(const Tree &tree, unsigned uNodeIndex, unsigned uMaxLeafCount, unsigned SubFams[], unsigned &uSubFamCount) { if (tree.IsLeaf(uNodeIndex)) return 1; unsigned uLeft = tree.GetLeft(uNodeIndex); unsigned uRight = tree.GetRight(uNodeIndex); unsigned uLeftCount = SubFamRecurse(tree, uLeft, uMaxLeafCount, SubFams, uSubFamCount); unsigned uRightCount = SubFamRecurse(tree, uRight, uMaxLeafCount, SubFams, uSubFamCount); unsigned uLeafCount = uLeftCount + uRightCount; if (uLeftCount + uRightCount > uMaxLeafCount) { if (uLeftCount <= uMaxLeafCount) SubFams[uSubFamCount++] = uLeft; if (uRightCount <= uMaxLeafCount) SubFams[uSubFamCount++] = uRight; } else if (tree.IsRoot(uNodeIndex)) { if (uSubFamCount != 0) Quit("Error in SubFamRecurse"); SubFams[uSubFamCount++] = uNodeIndex; } return uLeafCount; }
// Return false when done bool PhyEnumEdges(const Tree &tree, PhyEnumEdgeState &ES) { unsigned uNode1 = uInsane; if (!ES.m_bInit) { if (tree.GetNodeCount() <= 1) { ES.m_uNodeIndex1 = NULL_NEIGHBOR; ES.m_uNodeIndex2 = NULL_NEIGHBOR; return false; } uNode1 = tree.FirstDepthFirstNode(); ES.m_bInit = true; } else { uNode1 = tree.NextDepthFirstNode(ES.m_uNodeIndex1); if (NULL_NEIGHBOR == uNode1) return false; if (tree.IsRooted() && tree.IsRoot(uNode1)) { uNode1 = tree.NextDepthFirstNode(uNode1); if (NULL_NEIGHBOR == uNode1) return false; } } unsigned uNode2 = tree.GetParent(uNode1); ES.m_uNodeIndex1 = uNode1; ES.m_uNodeIndex2 = uNode2; return true; }
bool PhyEnumBiParts(const Tree &tree, PhyEnumEdgeState &ES, unsigned Leaves1[], unsigned *ptruCount1, unsigned Leaves2[], unsigned *ptruCount2) { bool bOk = PhyEnumEdges(tree, ES); if (!bOk) { *ptruCount1 = 0; *ptruCount2 = 0; return false; } // Special case: in a rooted tree, both edges from the root // give the same bipartition, so skip one of them. if (tree.IsRooted() && tree.IsRoot(ES.m_uNodeIndex2) && tree.GetRight(ES.m_uNodeIndex2) == ES.m_uNodeIndex1) { bOk = PhyEnumEdges(tree, ES); if (!bOk) return false; } PhyGetLeaves(tree, ES.m_uNodeIndex1, ES.m_uNodeIndex2, Leaves1, ptruCount1); PhyGetLeaves(tree, ES.m_uNodeIndex2, ES.m_uNodeIndex1, Leaves2, ptruCount2); if (*ptruCount1 + *ptruCount2 != tree.GetLeafCount()) Quit("PhyEnumBiParts %u + %u != %u", *ptruCount1, *ptruCount2, tree.GetLeafCount()); #if DEBUG { for (unsigned i = 0; i < *ptruCount1; ++i) { if (!tree.IsLeaf(Leaves1[i])) Quit("PhyEnumByParts: not leaf"); for (unsigned j = 0; j < *ptruCount2; ++j) { if (!tree.IsLeaf(Leaves2[j])) Quit("PhyEnumByParts: not leaf"); if (Leaves1[i] == Leaves2[j]) Quit("PhyEnumByParts: dupe"); } } } #endif return true; }
void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[]) { #if TRACE Log("CalcClustalWWeights\n"); tree.LogMe(); #endif const unsigned uLeafCount = tree.GetLeafCount(); if (0 == uLeafCount) return; else if (1 == uLeafCount) { Weights[0] = (WEIGHT) 1.0; return; } else if (2 == uLeafCount) { Weights[0] = (WEIGHT) 0.5; Weights[1] = (WEIGHT) 0.5; return; } if (!tree.IsRooted()) Quit("CalcClustalWWeights requires rooted tree"); const unsigned uNodeCount = tree.GetNodeCount(); unsigned *LeavesUnderNode = new unsigned[uNodeCount]; memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned)); const unsigned uRootNodeIndex = tree.GetRootNodeIndex(); unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode); if (uLeavesUnderRoot != uLeafCount) Quit("WeightsFromTreee: Internal error, root count %u %u", uLeavesUnderRoot, uLeafCount); #if TRACE Log("Node Leaves Length Strength\n"); Log("---- ------ -------- --------\n"); // 1234 123456 12345678 12345678 #endif double *Strengths = new double[uNodeCount]; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (tree.IsRoot(uNodeIndex)) { Strengths[uNodeIndex] = 0.0; continue; } const unsigned uParent = tree.GetParent(uNodeIndex); const double dLength = tree.GetEdgeLength(uNodeIndex, uParent); const unsigned uLeaves = LeavesUnderNode[uNodeIndex]; const double dStrength = dLength / (double) uLeaves; Strengths[uNodeIndex] = dStrength; #if TRACE Log("%4u %6u %8g %8g\n", uNodeIndex, uLeaves, dLength, dStrength); #endif } #if TRACE Log("\n"); Log(" Seq Path..Weight\n"); Log("-------------------- ------------\n"); #endif for (unsigned n = 0; n < uLeafCount; ++n) { const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n); #if TRACE Log("%20.20s %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex); #endif if (!tree.IsLeaf(uLeafNodeIndex)) Quit("CalcClustalWWeights: leaf"); double dWeight = 0; unsigned uNode = uLeafNodeIndex; while (!tree.IsRoot(uNode)) { dWeight += Strengths[uNode]; uNode = tree.GetParent(uNode); #if TRACE Log("->%u(%g)", uNode, Strengths[uNode]); #endif } if (dWeight < 0.0001) { #if TRACE Log("zero->one"); #endif dWeight = 1.0; } Weights[n] = (WEIGHT) dWeight; #if TRACE Log(" = %g\n", dWeight); #endif } delete[] Strengths; delete[] LeavesUnderNode; Normalize(Weights, uLeafCount); }
static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[], unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa) { const unsigned uNodeCount = tree.GetNodeCount(); bool *Ready = new bool[uNodeCount]; MSA **MSAs = new MSA *[uNodeCount]; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { Ready[uNodeIndex] = false; MSAs[uNodeIndex] = 0; } for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex) { unsigned uNodeIndex = Subfams[uSubfamIndex]; Ready[uNodeIndex] = true; MSA *ptrMSA = new MSA; // TODO: Wasteful copy, needs re-design ptrMSA->Copy(SubfamMSAs[uSubfamIndex]); MSAs[uNodeIndex] = ptrMSA; } for (unsigned uNodeIndex = tree.FirstDepthFirstNode(); NULL_NEIGHBOR != uNodeIndex; uNodeIndex = tree.NextDepthFirstNode(uNodeIndex)) { if (tree.IsLeaf(uNodeIndex)) continue; unsigned uRight = tree.GetRight(uNodeIndex); unsigned uLeft = tree.GetLeft(uNodeIndex); if (!Ready[uRight] || !Ready[uLeft]) continue; MSA *ptrLeft = MSAs[uLeft]; MSA *ptrRight = MSAs[uRight]; assert(ptrLeft != 0 && ptrRight != 0); MSA *ptrParent = new MSA; PWPath Path; AlignTwoMSAs(*ptrLeft, *ptrRight, *ptrParent, Path); MSAs[uNodeIndex] = ptrParent; Ready[uNodeIndex] = true; Ready[uLeft] = false; Ready[uRight] = false; delete MSAs[uLeft]; delete MSAs[uRight]; MSAs[uLeft] = 0; MSAs[uRight] = 0; } #if DEBUG { unsigned uReadyCount = 0; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (Ready[uNodeIndex]) { assert(tree.IsRoot(uNodeIndex)); ++uReadyCount; assert(0 != MSAs[uNodeIndex]); } else assert(0 == MSAs[uNodeIndex]); } assert(1 == uReadyCount); } #endif const unsigned uRoot = tree.GetRootNodeIndex(); MSA *ptrRootAlignment = MSAs[uRoot]; msa.Copy(*ptrRootAlignment); delete ptrRootAlignment; delete[] Ready; #if TRACE Log("After refine subfamilies, root alignment=\n"); msa.LogMe(); #endif }
void Tree::PruneTree(const Tree &tree, unsigned Subfams[], unsigned uSubfamCount) { if (!tree.IsRooted()) Quit("Tree::PruneTree: requires rooted tree"); Clear(); m_uNodeCount = 2*uSubfamCount - 1; InitCache(m_uNodeCount); const unsigned uUnprunedNodeCount = tree.GetNodeCount(); unsigned *uUnprunedToPrunedIndex = new unsigned[uUnprunedNodeCount]; unsigned *uPrunedToUnprunedIndex = new unsigned[m_uNodeCount]; for (unsigned n = 0; n < uUnprunedNodeCount; ++n) uUnprunedToPrunedIndex[n] = NULL_NEIGHBOR; for (unsigned n = 0; n < m_uNodeCount; ++n) uPrunedToUnprunedIndex[n] = NULL_NEIGHBOR; // Create mapping between unpruned and pruned node indexes unsigned uInternalNodeIndex = uSubfamCount; for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex) { unsigned uUnprunedNodeIndex = Subfams[uSubfamIndex]; uUnprunedToPrunedIndex[uUnprunedNodeIndex] = uSubfamIndex; uPrunedToUnprunedIndex[uSubfamIndex] = uUnprunedNodeIndex; for (;;) { uUnprunedNodeIndex = tree.GetParent(uUnprunedNodeIndex); if (tree.IsRoot(uUnprunedNodeIndex)) break; // Already visited this node? if (NULL_NEIGHBOR != uUnprunedToPrunedIndex[uUnprunedNodeIndex]) break; uUnprunedToPrunedIndex[uUnprunedNodeIndex] = uInternalNodeIndex; uPrunedToUnprunedIndex[uInternalNodeIndex] = uUnprunedNodeIndex; ++uInternalNodeIndex; } } const unsigned uUnprunedRootIndex = tree.GetRootNodeIndex(); uUnprunedToPrunedIndex[uUnprunedRootIndex] = uInternalNodeIndex; uPrunedToUnprunedIndex[uInternalNodeIndex] = uUnprunedRootIndex; #if TRACE { Log("Pruned to unpruned:\n"); for (unsigned i = 0; i < m_uNodeCount; ++i) Log(" [%u]=%u", i, uPrunedToUnprunedIndex[i]); Log("\n"); Log("Unpruned to pruned:\n"); for (unsigned i = 0; i < uUnprunedNodeCount; ++i) { unsigned n = uUnprunedToPrunedIndex[i]; if (n != NULL_NEIGHBOR) Log(" [%u]=%u", i, n); } Log("\n"); } #endif if (uInternalNodeIndex != m_uNodeCount - 1) Quit("Tree::PruneTree, Internal error"); // Nodes 0, 1 ... are the leaves for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex) { char szName[32]; sprintf(szName, "Subfam_%u", uSubfamIndex + 1); m_ptrName[uSubfamIndex] = strsave(szName); } for (unsigned uPrunedNodeIndex = uSubfamCount; uPrunedNodeIndex < m_uNodeCount; ++uPrunedNodeIndex) { unsigned uUnprunedNodeIndex = uPrunedToUnprunedIndex[uPrunedNodeIndex]; const unsigned uUnprunedLeft = tree.GetLeft(uUnprunedNodeIndex); const unsigned uUnprunedRight = tree.GetRight(uUnprunedNodeIndex); const unsigned uPrunedLeft = uUnprunedToPrunedIndex[uUnprunedLeft]; const unsigned uPrunedRight = uUnprunedToPrunedIndex[uUnprunedRight]; const double dLeftLength = tree.GetEdgeLength(uUnprunedNodeIndex, uUnprunedLeft); const double dRightLength = tree.GetEdgeLength(uUnprunedNodeIndex, uUnprunedRight); m_uNeighbor2[uPrunedNodeIndex] = uPrunedLeft; m_uNeighbor3[uPrunedNodeIndex] = uPrunedRight; m_dEdgeLength1[uPrunedLeft] = dLeftLength; m_dEdgeLength1[uPrunedRight] = dRightLength; m_uNeighbor1[uPrunedLeft] = uPrunedNodeIndex; m_uNeighbor1[uPrunedRight] = uPrunedNodeIndex; m_bHasEdgeLength1[uPrunedLeft] = true; m_bHasEdgeLength1[uPrunedRight] = true; m_dEdgeLength2[uPrunedNodeIndex] = dLeftLength; m_dEdgeLength3[uPrunedNodeIndex] = dRightLength; m_bHasEdgeLength2[uPrunedNodeIndex] = true; m_bHasEdgeLength3[uPrunedNodeIndex] = true; } m_uRootNodeIndex = uUnprunedToPrunedIndex[uUnprunedRootIndex]; m_bRooted = true; Validate(); delete[] uUnprunedToPrunedIndex; }
void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs, unsigned IdToDiffsLeafNodeIndex[]) { #if TRACE Log("Tree1:\n"); Tree1.LogMe(); Log("\n"); Log("Tree2:\n"); Tree2.LogMe(); #endif if (!Tree1.IsRooted() || !Tree2.IsRooted()) Quit("DiffTrees: requires rooted trees"); const unsigned uNodeCount = Tree1.GetNodeCount(); const unsigned uNodeCount2 = Tree2.GetNodeCount(); const unsigned uLeafCount = Tree1.GetLeafCount(); const unsigned uLeafCount2 = Tree2.GetLeafCount(); assert(uLeafCount == uLeafCount2); if (uNodeCount != uNodeCount2) Quit("DiffTrees: different node counts"); // Allocate tables so we can convert tree node index to // and from the unique id with a O(1) lookup. unsigned *NodeIndexToId1 = new unsigned[uNodeCount]; unsigned *IdToNodeIndex2 = new unsigned[uNodeCount]; bool *bIsBachelor1 = new bool[uNodeCount]; bool *bIsDiff1 = new bool[uNodeCount]; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { NodeIndexToId1[uNodeIndex] = uNodeCount; bIsBachelor1[uNodeIndex] = false; bIsDiff1[uNodeIndex] = false; // Use uNodeCount as value meaning "not set". IdToNodeIndex2[uNodeIndex] = uNodeCount; } // Initialize node index <-> id lookup tables for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (Tree1.IsLeaf(uNodeIndex)) { const unsigned uId = Tree1.GetLeafId(uNodeIndex); if (uId >= uNodeCount) Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)"); NodeIndexToId1[uNodeIndex] = uId; } if (Tree2.IsLeaf(uNodeIndex)) { const unsigned uId = Tree2.GetLeafId(uNodeIndex); if (uId >= uNodeCount) Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)"); IdToNodeIndex2[uId] = uNodeIndex; } } // Validity check. This verifies that the ids // pre-assigned to the leaves in Tree1 are unique // (note that the id<N check above does not rule // out two leaves having duplicate ids). for (unsigned uId = 0; uId < uLeafCount; ++uId) { unsigned uNodeIndex2 = IdToNodeIndex2[uId]; if (uNodeCount == uNodeIndex2) Quit("DiffTrees, check 2"); } // Ids assigned to internal nodes are N, N+1 ... // An internal node id uniquely identifies a set // of two or more leaves. unsigned uInternalNodeId = uLeafCount; // Depth-first traversal of tree. // The order guarantees that a node is visited before // its parent is visited. for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode(); NULL_NEIGHBOR != uNodeIndex1; uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1)) { #if TRACE Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n", uNodeIndex1, Tree1.IsLeaf(uNodeIndex1), bIsBachelor1[uNodeIndex1]); #endif // Leaves are trivial; nothing to do. if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1]) continue; // If either child is a bachelor, flag // this node as a bachelor and continue. unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1); if (bIsBachelor1[uLeft1]) { bIsBachelor1[uNodeIndex1] = true; continue; } unsigned uRight1 = Tree1.GetRight(uNodeIndex1); if (bIsBachelor1[uRight1]) { bIsBachelor1[uNodeIndex1] = true; continue; } // Both children are married. // Married nodes are guaranteed to have an id. unsigned uIdLeft = NodeIndexToId1[uLeft1]; unsigned uIdRight = NodeIndexToId1[uRight1]; if (uIdLeft == uNodeCount || uIdRight == uNodeCount) Quit("DiffTrees, check 5"); // uLeft2 is the spouse of uLeft1, and similarly for uRight2. unsigned uLeft2 = IdToNodeIndex2[uIdLeft]; unsigned uRight2 = IdToNodeIndex2[uIdRight]; if (uLeft2 == uNodeCount || uRight2 == uNodeCount) Quit("DiffTrees, check 6"); // If the spouses of uLeft1 and uRight1 have the same // parent, then this parent is the spouse of uNodeIndex1. // Otherwise, uNodeIndex1 is a diff. unsigned uParentLeft2 = Tree2.GetParent(uLeft2); unsigned uParentRight2 = Tree2.GetParent(uRight2); #if TRACE Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n", uLeft1, uRight1, uLeft2, uRight2, uParentLeft2, uParentRight2); #endif if (uParentLeft2 == uParentRight2) { NodeIndexToId1[uNodeIndex1] = uInternalNodeId; IdToNodeIndex2[uInternalNodeId] = uParentLeft2; ++uInternalNodeId; } else bIsBachelor1[uNodeIndex1] = true; } unsigned uDiffCount = 0; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (bIsBachelor1[uNodeIndex]) continue; if (Tree1.IsRoot(uNodeIndex)) { // Special case: if no bachelors, consider the // root a diff. if (!bIsBachelor1[uNodeIndex]) bIsDiff1[uNodeIndex] = true; continue; } const unsigned uParent = Tree1.GetParent(uNodeIndex); if (bIsBachelor1[uParent]) { bIsDiff1[uNodeIndex] = true; ++uDiffCount; } } #if TRACE Log("Tree1:\n"); Log("Node Id Bach Diff Name\n"); Log("---- ---- ---- ---- ----\n"); for (unsigned n = 0; n < uNodeCount; ++n) { Log("%4u %4u %d %d", n, NodeIndexToId1[n], bIsBachelor1[n], bIsDiff1[n]); if (Tree1.IsLeaf(n)) Log(" %s", Tree1.GetLeafName(n)); Log("\n"); } Log("\n"); Log("Tree2:\n"); Log("Node Id Name\n"); Log("---- ---- ----\n"); for (unsigned n = 0; n < uNodeCount; ++n) { Log("%4u ", n); if (Tree2.IsLeaf(n)) Log(" %s", Tree2.GetLeafName(n)); Log("\n"); } #endif Diffs.CreateRooted(); const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex(); const unsigned uRootIndex1 = Tree1.GetRootNodeIndex(); for (unsigned n = 0; n < uLeafCount; ++n) IdToDiffsLeafNodeIndex[n] = uNodeCount; BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex, IdToDiffsLeafNodeIndex); #if TRACE Log("\n"); Log("Diffs:\n"); Diffs.LogMe(); Log("\n"); Log("IdToDiffsLeafNodeIndex:"); for (unsigned n = 0; n < uLeafCount; ++n) { if (n%16 == 0) Log("\n"); else Log(" "); Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]); } Log("\n"); #endif for (unsigned n = 0; n < uLeafCount; ++n) if (IdToDiffsLeafNodeIndex[n] == uNodeCount) Quit("TreeDiffs check 7"); delete[] NodeIndexToId1; delete[] IdToNodeIndex2; delete[] bIsBachelor1; delete[] bIsDiff1; }