void MSA::SetClustalWWeights(const Tree &tree) { const unsigned uSeqCount = GetSeqCount(); const unsigned uLeafCount = tree.GetLeafCount(); WEIGHT *Weights = new WEIGHT[uSeqCount]; CalcClustalWWeights(tree, Weights); for (unsigned n = 0; n < uLeafCount; ++n) { const WEIGHT w = Weights[n]; const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n); const unsigned uId = tree.GetLeafId(uLeafNodeIndex); const unsigned uSeqIndex = GetSeqIndex(uId); #if DEBUG if (GetSeqName(uSeqIndex) != tree.GetLeafName(uLeafNodeIndex)) Quit("MSA::SetClustalWWeights: names don't match"); #endif SetSeqWeight(uSeqIndex, w); } NormalizeWeights((WEIGHT) 1.0); delete[] Weights; }
bool PhyEnumBiParts(const Tree &tree, PhyEnumEdgeState &ES, unsigned Leaves1[], unsigned *ptruCount1, unsigned Leaves2[], unsigned *ptruCount2) { bool bOk = PhyEnumEdges(tree, ES); if (!bOk) { *ptruCount1 = 0; *ptruCount2 = 0; return false; } // Special case: in a rooted tree, both edges from the root // give the same bipartition, so skip one of them. if (tree.IsRooted() && tree.IsRoot(ES.m_uNodeIndex2) && tree.GetRight(ES.m_uNodeIndex2) == ES.m_uNodeIndex1) { bOk = PhyEnumEdges(tree, ES); if (!bOk) return false; } PhyGetLeaves(tree, ES.m_uNodeIndex1, ES.m_uNodeIndex2, Leaves1, ptruCount1); PhyGetLeaves(tree, ES.m_uNodeIndex2, ES.m_uNodeIndex1, Leaves2, ptruCount2); if (*ptruCount1 + *ptruCount2 != tree.GetLeafCount()) Quit("PhyEnumBiParts %u + %u != %u", *ptruCount1, *ptruCount2, tree.GetLeafCount()); #if DEBUG { for (unsigned i = 0; i < *ptruCount1; ++i) { if (!tree.IsLeaf(Leaves1[i])) Quit("PhyEnumByParts: not leaf"); for (unsigned j = 0; j < *ptruCount2; ++j) { if (!tree.IsLeaf(Leaves2[j])) Quit("PhyEnumByParts: not leaf"); if (Leaves1[i] == Leaves2[j]) Quit("PhyEnumByParts: dupe"); } } } #endif return true; }
static void BuildDiffs(const Tree &tree, unsigned uTreeNodeIndex, const bool bIsDiff[], Tree &Diffs, unsigned uDiffsNodeIndex, unsigned IdToDiffsLeafNodeIndex[]) { #if TRACE Log("BuildDiffs(TreeNode=%u IsDiff=%d IsLeaf=%d)\n", uTreeNodeIndex, bIsDiff[uTreeNodeIndex], tree.IsLeaf(uTreeNodeIndex)); #endif if (bIsDiff[uTreeNodeIndex]) { unsigned uLeafCount = tree.GetLeafCount(); unsigned *Leaves = new unsigned[uLeafCount]; GetLeaves(tree, uTreeNodeIndex, Leaves, &uLeafCount); for (unsigned n = 0; n < uLeafCount; ++n) { const unsigned uLeafNodeIndex = Leaves[n]; const unsigned uId = tree.GetLeafId(uLeafNodeIndex); if (uId >= tree.GetLeafCount()) Quit("BuildDiffs, id out of range"); IdToDiffsLeafNodeIndex[uId] = uDiffsNodeIndex; #if TRACE Log(" Leaf id=%u DiffsNode=%u\n", uId, uDiffsNodeIndex); #endif } delete[] Leaves; return; } if (tree.IsLeaf(uTreeNodeIndex)) Quit("BuildDiffs: should never reach leaf"); const unsigned uTreeLeft = tree.GetLeft(uTreeNodeIndex); const unsigned uTreeRight = tree.GetRight(uTreeNodeIndex); const unsigned uDiffsLeft = Diffs.AppendBranch(uDiffsNodeIndex); const unsigned uDiffsRight = uDiffsLeft + 1; BuildDiffs(tree, uTreeLeft, bIsDiff, Diffs, uDiffsLeft, IdToDiffsLeafNodeIndex); BuildDiffs(tree, uTreeRight, bIsDiff, Diffs, uDiffsRight, IdToDiffsLeafNodeIndex); }
void RefineTreeE(MSA &msa, const SeqVect &v, Tree &tree, ProgNode *ProgNodes) { MuscleContext *ctx = getMuscleContext(); const unsigned uSeqCount = msa.GetSeqCount(); if (tree.GetLeafCount() != uSeqCount) Quit("Refine tree, tree has different number of nodes"); if (uSeqCount < 3) return; #if DEBUG ValidateMuscleIds(msa); ValidateMuscleIds(tree); #endif const unsigned uNodeCount = tree.GetNodeCount(); unsigned *uNewNodeIndexToOldNodeIndex= new unsigned[uNodeCount]; Tree Tree2; TreeFromMSA(msa, Tree2, ctx->params.g_Cluster2, ctx->params.g_Distance2, ctx->params.g_Root2, ctx->params.g_pstrDistMxFileName2); #if DEBUG ValidateMuscleIds(Tree2); #endif DiffTreesE(Tree2, tree, uNewNodeIndexToOldNodeIndex); unsigned uRoot = Tree2.GetRootNodeIndex(); if (NODE_CHANGED == uNewNodeIndexToOldNodeIndex[uRoot]) { MSA msa2; RealignDiffsE(msa, v, Tree2, tree, uNewNodeIndexToOldNodeIndex, msa2, ProgNodes); if (!ctx->isCanceled()) { tree.Copy(Tree2); msa.Copy(msa2); #if DEBUG ValidateMuscleIds(msa2); #endif } } delete[] uNewNodeIndexToOldNodeIndex; if (ctx->isCanceled()) { throw MuscleException("Canceled"); } SetCurrentAlignment(msa); ProgressStepsDone(); }
void SetMuscleTree(const Tree &tree) { g_ptrMuscleTree = &tree; if (SEQWEIGHT_ClustalW != GetSeqWeightMethod()) return; delete[] g_MuscleWeights; const unsigned uLeafCount = tree.GetLeafCount(); g_uMuscleIdCount = uLeafCount; g_MuscleWeights = new WEIGHT[uLeafCount]; CalcClustalWWeights(tree, g_MuscleWeights); }
void SetMuscleTree(const Tree &tree) { MuscleContext *ctx =getMuscleContext(); WEIGHT* &g_MuscleWeights = ctx->msa2.g_MuscleWeights; unsigned &g_uMuscleIdCount = ctx->msa2.g_uMuscleIdCount; const Tree* &g_ptrMuscleTree = ctx->msa2.g_ptrMuscleTree; g_ptrMuscleTree = &tree; if (SEQWEIGHT_ClustalW != GetSeqWeightMethod()) return; if(g_MuscleWeights!=NULL) { delete[] g_MuscleWeights; g_MuscleWeights = NULL; } const unsigned uLeafCount = tree.GetLeafCount(); g_uMuscleIdCount = uLeafCount; g_MuscleWeights = new WEIGHT[uLeafCount]; CalcClustalWWeights(tree, g_MuscleWeights); }
void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[]) { #if TRACE Log("CalcClustalWWeights\n"); tree.LogMe(); #endif const unsigned uLeafCount = tree.GetLeafCount(); if (0 == uLeafCount) return; else if (1 == uLeafCount) { Weights[0] = (WEIGHT) 1.0; return; } else if (2 == uLeafCount) { Weights[0] = (WEIGHT) 0.5; Weights[1] = (WEIGHT) 0.5; return; } if (!tree.IsRooted()) Quit("CalcClustalWWeights requires rooted tree"); const unsigned uNodeCount = tree.GetNodeCount(); unsigned *LeavesUnderNode = new unsigned[uNodeCount]; memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned)); const unsigned uRootNodeIndex = tree.GetRootNodeIndex(); unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode); if (uLeavesUnderRoot != uLeafCount) Quit("WeightsFromTreee: Internal error, root count %u %u", uLeavesUnderRoot, uLeafCount); #if TRACE Log("Node Leaves Length Strength\n"); Log("---- ------ -------- --------\n"); // 1234 123456 12345678 12345678 #endif double *Strengths = new double[uNodeCount]; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (tree.IsRoot(uNodeIndex)) { Strengths[uNodeIndex] = 0.0; continue; } const unsigned uParent = tree.GetParent(uNodeIndex); const double dLength = tree.GetEdgeLength(uNodeIndex, uParent); const unsigned uLeaves = LeavesUnderNode[uNodeIndex]; const double dStrength = dLength / (double) uLeaves; Strengths[uNodeIndex] = dStrength; #if TRACE Log("%4u %6u %8g %8g\n", uNodeIndex, uLeaves, dLength, dStrength); #endif } #if TRACE Log("\n"); Log(" Seq Path..Weight\n"); Log("-------------------- ------------\n"); #endif for (unsigned n = 0; n < uLeafCount; ++n) { const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n); #if TRACE Log("%20.20s %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex); #endif if (!tree.IsLeaf(uLeafNodeIndex)) Quit("CalcClustalWWeights: leaf"); double dWeight = 0; unsigned uNode = uLeafNodeIndex; while (!tree.IsRoot(uNode)) { dWeight += Strengths[uNode]; uNode = tree.GetParent(uNode); #if TRACE Log("->%u(%g)", uNode, Strengths[uNode]); #endif } if (dWeight < 0.0001) { #if TRACE Log("zero->one"); #endif dWeight = 1.0; } Weights[n] = (WEIGHT) dWeight; #if TRACE Log(" = %g\n", dWeight); #endif } delete[] Strengths; delete[] LeavesUnderNode; Normalize(Weights, uLeafCount); }
void DoMuscle() { SetOutputFileName(g_pstrOutFileName.get()); SetInputFileName(g_pstrInFileName.get()); SetMaxIters(g_uMaxIters.get()); SetSeqWeightMethod(g_SeqWeight1.get()); TextFile fileIn(g_pstrInFileName.get()); SeqVect v; v.FromFASTAFile(fileIn); const unsigned uSeqCount = v.Length(); if (0 == uSeqCount) Quit("No sequences in input file"); ALPHA Alpha = ALPHA_Undefined; switch (g_SeqType.get()) { case SEQTYPE_Auto: Alpha = v.GuessAlpha(); break; case SEQTYPE_Protein: Alpha = ALPHA_Amino; break; case SEQTYPE_DNA: Alpha = ALPHA_DNA; break; case SEQTYPE_RNA: Alpha = ALPHA_RNA; break; default: Quit("Invalid seq type"); } SetAlpha(Alpha); v.FixAlpha(); // // AED 21/12/06: Moved matrix loading code inside the PP param function so it gets called for all alignment types // SetPPScore(); unsigned uMaxL = 0; unsigned uTotL = 0; for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { unsigned L = v.GetSeq(uSeqIndex).Length(); uTotL += L; if (L > uMaxL) uMaxL = L; } SetIter(1); g_bDiags.get() = g_bDiags1.get(); SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount); SetMuscleSeqVect(v); MSA::SetIdCount(uSeqCount); // Initialize sequence ids. // From this point on, ids must somehow propogate from here. for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) v.SetSeqId(uSeqIndex, uSeqIndex); if (0 == uSeqCount) Quit("Input file '%s' has no sequences", g_pstrInFileName.get()); if (1 == uSeqCount) { TextFile fileOut(g_pstrOutFileName.get(), true); v.ToFile(fileOut); return; } if (uSeqCount > 1) MHackStart(v); // First iteration Tree GuideTree; if (0 != g_pstrUseTreeFileName.get()) { // Discourage users... if (!g_bUseTreeNoWarn.get()) fprintf(stderr, g_strUseTreeWarning); // Read tree from file TextFile TreeFile(g_pstrUseTreeFileName.get()); GuideTree.FromFile(TreeFile); // Make sure tree is rooted if (!GuideTree.IsRooted()) Quit("User tree must be rooted"); if (GuideTree.GetLeafCount() != uSeqCount) Quit("User tree does not match input sequences"); const unsigned uNodeCount = GuideTree.GetNodeCount(); for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (!GuideTree.IsLeaf(uNodeIndex)) continue; const char *LeafName = GuideTree.GetLeafName(uNodeIndex); unsigned uSeqIndex; bool SeqFound = v.FindName(LeafName, &uSeqIndex); if (!SeqFound) Quit("Label %s in tree does not match sequences", LeafName); unsigned uId = v.GetSeqIdFromName(LeafName); GuideTree.SetLeafId(uNodeIndex, uId); } } else TreeFromSeqVect(v, GuideTree, g_Cluster1.get(), g_Distance1.get(), g_Root1.get(), g_pstrDistMxFileName1.get()); const char *Tree1 = ValueOpt("Tree1"); if (0 != Tree1) { TextFile f(Tree1, true); GuideTree.ToFile(f); if (g_bClusterOnly.get()) return; } SetMuscleTree(GuideTree); ValidateMuscleIds(GuideTree); MSA msa; ProgNode *ProgNodes = 0; if (g_bLow.get()) ProgNodes = ProgressiveAlignE(v, GuideTree, msa); else ProgressiveAlign(v, GuideTree, msa); SetCurrentAlignment(msa); if (0 != g_pstrComputeWeightsFileName.get()) { extern void OutWeights(const char *FileName, const MSA &msa); SetMSAWeightsMuscle(msa); OutWeights(g_pstrComputeWeightsFileName.get(), msa); return; } ValidateMuscleIds(msa); if (1 == g_uMaxIters.get() || 2 == uSeqCount) { //TextFile fileOut(g_pstrOutFileName.get(), true); //MHackEnd(msa); //msa.ToFile(fileOut); MuscleOutput(msa); return; } if (0 == g_pstrUseTreeFileName.get()) { g_bDiags.get() = g_bDiags2.get(); SetIter(2); if (g_bLow.get()) { if (0 != g_uMaxTreeRefineIters.get()) RefineTreeE(msa, v, GuideTree, ProgNodes); } else RefineTree(msa, GuideTree); const char *Tree2 = ValueOpt("Tree2"); if (0 != Tree2) { TextFile f(Tree2, true); GuideTree.ToFile(f); } } SetSeqWeightMethod(g_SeqWeight2.get()); SetMuscleTree(GuideTree); if (g_bAnchors.get()) RefineVert(msa, GuideTree, g_uMaxIters.get() - 2); else RefineHoriz(msa, GuideTree, g_uMaxIters.get() - 2, false, false); #if 0 // Refining by subfamilies is disabled as it didn't give better // results. I tried doing this before and after RefineHoriz. // Should get back to this as it seems like this should work. RefineSubfams(msa, GuideTree, g_uMaxIters.get() - 2); #endif ValidateMuscleIds(msa); ValidateMuscleIds(GuideTree); //TextFile fileOut(g_pstrOutFileName.get(), true); //MHackEnd(msa); //msa.ToFile(fileOut); MuscleOutput(msa); }
void DoMuscle(CompositeVect*CVLocation) { SetOutputFileName(g_pstrOutFileName); SetInputFileName(g_pstrInFileName); SetMaxIters(g_uMaxIters); SetSeqWeightMethod(g_SeqWeight1); TextFile fileIn(g_pstrInFileName); SeqVect v; v.FromFASTAFile(fileIn); const unsigned uSeqCount = v.Length(); if (0 == uSeqCount) Quit("No sequences in input file"); ALPHA Alpha = ALPHA_Undefined; switch (g_SeqType) { case SEQTYPE_Auto: Alpha = v.GuessAlpha(); break; case SEQTYPE_Protein: Alpha = ALPHA_Amino; break; case SEQTYPE_DNA: Alpha = ALPHA_DNA; break; case SEQTYPE_RNA: Alpha = ALPHA_RNA; break; default: Quit("Invalid seq type"); } SetAlpha(Alpha); v.FixAlpha(); PTR_SCOREMATRIX UserMatrix = 0; if (0 != g_pstrMatrixFileName) { const char *FileName = g_pstrMatrixFileName; const char *Path = getenv("MUSCLE_MXPATH"); if (Path != 0) { size_t n = strlen(Path) + 1 + strlen(FileName) + 1; char *NewFileName = new char[n]; sprintf(NewFileName, "%s/%s", Path, FileName); FileName = NewFileName; } TextFile File(FileName); UserMatrix = ReadMx(File); g_Alpha = ALPHA_Amino; g_PPScore = PPSCORE_SP; } SetPPScore(); if (0 != UserMatrix) g_ptrScoreMatrix = UserMatrix; unsigned uMaxL = 0; unsigned uTotL = 0; for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { unsigned L = v.GetSeq(uSeqIndex).Length(); uTotL += L; if (L > uMaxL) uMaxL = L; } SetIter(1); g_bDiags = g_bDiags1; SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount); SetMuscleSeqVect(v); MSA::SetIdCount(uSeqCount); // Initialize sequence ids. // From this point on, ids must somehow propogate from here. for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) v.SetSeqId(uSeqIndex, uSeqIndex); if (0 == uSeqCount) Quit("Input file '%s' has no sequences", g_pstrInFileName); if (1 == uSeqCount) { TextFile fileOut(g_pstrOutFileName, true); v.ToFile(fileOut); return; } if (uSeqCount > 1) MHackStart(v); // First iteration Tree GuideTree; if (0 != g_pstrUseTreeFileName) { // Discourage users... if (!g_bUseTreeNoWarn) fprintf(stderr, "%s", g_strUseTreeWarning); // Read tree from file TextFile TreeFile(g_pstrUseTreeFileName); GuideTree.FromFile(TreeFile); // Make sure tree is rooted if (!GuideTree.IsRooted()) Quit("User tree must be rooted"); if (GuideTree.GetLeafCount() != uSeqCount) Quit("User tree does not match input sequences"); const unsigned uNodeCount = GuideTree.GetNodeCount(); for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (!GuideTree.IsLeaf(uNodeIndex)) continue; const char *LeafName = GuideTree.GetLeafName(uNodeIndex); unsigned uSeqIndex; bool SeqFound = v.FindName(LeafName, &uSeqIndex); if (!SeqFound) Quit("Label %s in tree does not match sequences", LeafName); unsigned uId = v.GetSeqIdFromName(LeafName); GuideTree.SetLeafId(uNodeIndex, uId); } } else TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1, g_pstrDistMxFileName1); const char *Tree1 = ValueOpt("Tree1"); if (0 != Tree1) { TextFile f(Tree1, true); GuideTree.ToFile(f); if (g_bClusterOnly) return; } SetMuscleTree(GuideTree); ValidateMuscleIds(GuideTree); MSA msa; msa.SetCompositeVector(CVLocation); ProgNode *ProgNodes = 0; if (g_bLow) ProgNodes = ProgressiveAlignE(v, GuideTree, msa); else ProgressiveAlign(v, GuideTree, msa); SetCurrentAlignment(msa); if (0 != g_pstrComputeWeightsFileName) { extern void OutWeights(const char *FileName, const MSA &msa); SetMSAWeightsMuscle(msa); OutWeights(g_pstrComputeWeightsFileName, msa); return; } ValidateMuscleIds(msa); if (1 == g_uMaxIters || 2 == uSeqCount) { //TextFile fileOut(g_pstrOutFileName, true); //MHackEnd(msa); //msa.ToFile(fileOut); MuscleOutput(msa); return; } if (0 == g_pstrUseTreeFileName) { g_bDiags = g_bDiags2; SetIter(2); if (g_bLow) { if (0 != g_uMaxTreeRefineIters) RefineTreeE(msa, v, GuideTree, ProgNodes); } else RefineTree(msa, GuideTree); const char *Tree2 = ValueOpt("Tree2"); if (0 != Tree2) { TextFile f(Tree2, true); GuideTree.ToFile(f); } } SetSeqWeightMethod(g_SeqWeight2); SetMuscleTree(GuideTree); if (g_bAnchors) RefineVert(msa, GuideTree, g_uMaxIters - 2); else RefineHoriz(msa, GuideTree, g_uMaxIters - 2, false, false); #if 0 // Refining by subfamilies is disabled as it didn't give better // results. I tried doing this before and after RefineHoriz. // Should get back to this as it seems like this should work. RefineSubfams(msa, GuideTree, g_uMaxIters - 2); #endif ValidateMuscleIds(msa); ValidateMuscleIds(GuideTree); //TextFile fileOut(g_pstrOutFileName, true); //MHackEnd(msa); //msa.ToFile(fileOut); MuscleOutput(msa); }
static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs, unsigned *ptruNode1, unsigned *ptruNode2, double *ptrdLength1, double *ptrdLength2) { const unsigned uNodeCount = tree.GetNodeCount(); const unsigned uLeafCount = tree.GetLeafCount(); unsigned uNode1 = NULL_NEIGHBOR; unsigned uNode2 = NULL_NEIGHBOR; double dMinHeight = VERY_LARGE_DOUBLE; double dBestLength1 = VERY_LARGE_DOUBLE; double dBestLength2 = VERY_LARGE_DOUBLE; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { const unsigned uNeighborCount = tree.GetNeighborCount(uNodeIndex); for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub) { const unsigned uNeighborIndex = tree.GetNeighbor(uNodeIndex, uSub); // Avoid visiting same edge a second time in reversed order. if (uNeighborIndex < uNodeIndex) continue; const unsigned uSubRev = tree.GetNeighborSubscript(uNeighborIndex, uNodeIndex); if (NULL_NEIGHBOR == uSubRev) Quit("RootByMinAvgLeafDist, internal error 1"); // Get info for edges Node1->Node2 and Node2->Node1 (reversed) const EdgeInfo &EI = EIs[uNodeIndex][uSub]; const EdgeInfo &EIRev = EIs[uNeighborIndex][uSubRev]; if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNeighborIndex || EIRev.m_uNode1 != uNeighborIndex || EIRev.m_uNode2 != uNodeIndex) Quit("RootByMinAvgLeafDist, internal error 2"); if (!EI.m_bSet) Quit("RootByMinAvgLeafDist, internal error 3"); if (uLeafCount != EI.m_uLeafCount + EIRev.m_uLeafCount) Quit("RootByMinAvgLeafDist, internal error 4"); const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNeighborIndex); if (dEdgeLength != tree.GetEdgeLength(uNeighborIndex, uNodeIndex)) Quit("RootByMinAvgLeafDist, internal error 5"); // Consider point p on edge 12 in tree (1=Node, 2=Neighbor). // // ----- ---- // | | // 1----p--2 // | | // ----- ---- // // Define: // ADLp = average distance to leaves to left of point p. // ADRp = average distance to leaves to right of point p. // L = edge length = distance 12 // x = distance 1p // So distance p2 = L - x. // Average distance from p to leaves on left of p is: // ADLp = ADL1 + x // Average distance from p to leaves on right of p is: // ADRp = ADR2 + (L - x) // To be a root, we require these two distances to be equal, // ADLp = ADRp // ADL1 + x = ADR2 + (L - x) // Solving for x, // x = (ADR2 - ADL1 + L)/2 // If 0 <= x <= L, we can place the root on edge 12. const double ADL1 = EI.m_dTotalDistToLeaves / EI.m_uLeafCount; const double ADR2 = EIRev.m_dTotalDistToLeaves / EIRev.m_uLeafCount; const double x = (ADR2 - ADL1 + dEdgeLength)/2.0; if (x >= 0 && x <= dEdgeLength) { const double dLength1 = x; const double dLength2 = dEdgeLength - x; const double dHeight1 = EI.m_dMaxDistToLeaf + dLength1; const double dHeight2 = EIRev.m_dMaxDistToLeaf + dLength2; const double dHeight = dHeight1 >= dHeight2 ? dHeight1 : dHeight2; #if TRACE Log("Candidate root Node1=%u Node2=%u Height=%g\n", uNodeIndex, uNeighborIndex, dHeight); #endif if (dHeight < dMinHeight) { uNode1 = uNodeIndex; uNode2 = uNeighborIndex; dBestLength1 = dLength1; dBestLength2 = dLength2; dMinHeight = dHeight; } } } } if (NULL_NEIGHBOR == uNode1 || NULL_NEIGHBOR == uNode2) Quit("RootByMinAvgLeafDist, internal error 6"); #if TRACE Log("Best root Node1=%u Node2=%u Length1=%g Length2=%g Height=%g\n", uNode1, uNode2, dBestLength1, dBestLength2, dMinHeight); #endif *ptruNode1 = uNode1; *ptruNode2 = uNode2; *ptrdLength1 = dBestLength1; *ptrdLength2 = dBestLength2; }
void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2, double *ptrdLength1, double *ptrdLength2, ROOT RootMethod) { #if TRACE tree.LogMe(); #endif if (tree.IsRooted()) Quit("FindRoot: tree already rooted"); const unsigned uNodeCount = tree.GetNodeCount(); const unsigned uLeafCount = tree.GetLeafCount(); if (uNodeCount < 2) Quit("Root: don't support trees with < 2 edges"); EdgeInfo **EIs = new EdgeInfo *[uNodeCount]; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) EIs[uNodeIndex] = new EdgeInfo[3]; EdgeList Edges; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) if (tree.IsLeaf(uNodeIndex)) { unsigned uParent = tree.GetNeighbor1(uNodeIndex); Edges.Add(uParent, uNodeIndex); } #if TRACE Log("Edges: "); Edges.LogMe(); #endif // Main loop: iterate until all distances known double dAllMaxDist = -1e20; unsigned uMaxFrom = NULL_NEIGHBOR; unsigned uMaxTo = NULL_NEIGHBOR; for (;;) { EdgeList NextEdges; #if TRACE Log("\nTop of main loop\n"); Log("Edges: "); Edges.LogMe(); Log("MDs:\n"); ListEIs(EIs, uNodeCount); #endif // For all edges const unsigned uEdgeCount = Edges.GetCount(); if (0 == uEdgeCount) break; for (unsigned n = 0; n < uEdgeCount; ++n) { unsigned uNodeFrom; unsigned uNodeTo; Edges.GetEdge(n, &uNodeFrom, &uNodeTo); CalcInfo(tree, uNodeFrom, uNodeTo, EIs); #if TRACE Log("Edge %u -> %u\n", uNodeFrom, uNodeTo); #endif const unsigned uNeighborCount = tree.GetNeighborCount(uNodeFrom); for (unsigned i = 0; i < uNeighborCount; ++i) { const unsigned uNeighborIndex = tree.GetNeighbor(uNodeFrom, i); if (!Known(tree, EIs, uNeighborIndex, uNodeFrom) && AllKnownOut(tree, EIs, uNeighborIndex, uNodeFrom)) NextEdges.Add(uNeighborIndex, uNodeFrom); } } Edges.Copy(NextEdges); } #if TRACE ListEIs(EIs, uNodeCount); #endif switch (RootMethod) { case ROOT_MidLongestSpan: RootByMidLongestSpan(tree, EIs, ptruNode1, ptruNode2, ptrdLength1, ptrdLength2); break; case ROOT_MinAvgLeafDist: RootByMinAvgLeafDist(tree, EIs, ptruNode1, ptruNode2, ptrdLength1, ptrdLength2); break; default: Quit("Invalid RootMethod=%d", RootMethod); } for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) delete[] EIs[uNodeIndex]; delete[] EIs; }
void DiffTreesE(const Tree &NewTree, const Tree &OldTree, unsigned NewNodeIndexToOldNodeIndex[]) { #if TRACE Log("DiffTreesE NewTree:\n"); NewTree.LogMe(); Log("\n"); Log("OldTree:\n"); OldTree.LogMe(); #endif if (!NewTree.IsRooted() || !OldTree.IsRooted()) Quit("DiffTrees: requires rooted trees"); const unsigned uNodeCount = NewTree.GetNodeCount(); const unsigned uOldNodeCount = OldTree.GetNodeCount(); const unsigned uLeafCount = NewTree.GetLeafCount(); const unsigned uOldLeafCount = OldTree.GetLeafCount(); if (uNodeCount != uOldNodeCount || uLeafCount != uOldLeafCount) Quit("DiffTreesE: different node counts"); { unsigned *IdToOldNodeIndex = new unsigned[uNodeCount]; for (unsigned uOldNodeIndex = 0; uOldNodeIndex < uNodeCount; ++uOldNodeIndex) { if (OldTree.IsLeaf(uOldNodeIndex)) { unsigned Id = OldTree.GetLeafId(uOldNodeIndex); IdToOldNodeIndex[Id] = uOldNodeIndex; } } // Initialize NewNodeIndexToOldNodeIndex[] // All internal nodes are marked as changed, but may be updated later. for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex) { if (NewTree.IsLeaf(uNewNodeIndex)) { unsigned uId = NewTree.GetLeafId(uNewNodeIndex); assert(uId < uLeafCount); unsigned uOldNodeIndex = IdToOldNodeIndex[uId]; assert(uOldNodeIndex < uNodeCount); NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldNodeIndex; } else NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED; } delete[] IdToOldNodeIndex; } // Depth-first traversal of tree. // The order guarantees that a node is visited before // its parent is visited. for (unsigned uNewNodeIndex = NewTree.FirstDepthFirstNode(); NULL_NEIGHBOR != uNewNodeIndex; uNewNodeIndex = NewTree.NextDepthFirstNode(uNewNodeIndex)) { if (NewTree.IsLeaf(uNewNodeIndex)) continue; // If either child is changed, flag this node as changed and continue. unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex); unsigned uOldLeft = NewNodeIndexToOldNodeIndex[uNewLeft]; if (NODE_CHANGED == uOldLeft) { NewNodeIndexToOldNodeIndex[uNewLeft] = NODE_CHANGED; continue; } unsigned uNewRight = NewTree.GetRight(uNewNodeIndex); unsigned uOldRight = NewNodeIndexToOldNodeIndex[uNewRight]; if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewRight]) { NewNodeIndexToOldNodeIndex[uNewRight] = NODE_CHANGED; continue; } unsigned uOldParentLeft = OldTree.GetParent(uOldLeft); unsigned uOldParentRight = OldTree.GetParent(uOldRight); if (uOldParentLeft == uOldParentRight) NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldParentLeft; else NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED; } #if TRACE { Log("NewToOld "); for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex) { Log(" [%3u]=", uNewNodeIndex); if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewNodeIndex]) Log(" X"); else Log("%3u", NewNodeIndexToOldNodeIndex[uNewNodeIndex]); if ((uNewNodeIndex+1)%8 == 0) Log("\n "); } Log("\n"); } #endif #if DEBUG { for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex) { unsigned uOld = NewNodeIndexToOldNodeIndex[uNewNodeIndex]; if (NewTree.IsLeaf(uNewNodeIndex)) { if (uOld >= uNodeCount) { Log("NewNode=%u uOld=%u > uNodeCount=%u\n", uNewNodeIndex, uOld, uNodeCount); Quit("Diff check failed"); } unsigned uIdNew = NewTree.GetLeafId(uNewNodeIndex); unsigned uIdOld = OldTree.GetLeafId(uOld); if (uIdNew != uIdOld) { Log("NewNode=%u uOld=%u IdNew=%u IdOld=%u\n", uNewNodeIndex, uOld, uIdNew, uIdOld); Quit("Diff check failed"); } continue; } if (NODE_CHANGED == uOld) continue; unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex); unsigned uNewRight = NewTree.GetRight(uNewNodeIndex); unsigned uOldLeft = OldTree.GetLeft(uOld); unsigned uOldRight = OldTree.GetRight(uOld); unsigned uNewLeftPartner = NewNodeIndexToOldNodeIndex[uNewLeft]; unsigned uNewRightPartner = NewNodeIndexToOldNodeIndex[uNewRight]; bool bSameNotRotated = (uNewLeftPartner == uOldLeft && uNewRightPartner == uOldRight); bool bSameRotated = (uNewLeftPartner == uOldRight && uNewRightPartner == uOldLeft); if (!bSameNotRotated && !bSameRotated) { Log("NewNode=%u NewL=%u NewR=%u\n", uNewNodeIndex, uNewLeft, uNewRight); Log("OldNode=%u OldL=%u OldR=%u\n", uOld, uOldLeft, uOldRight); Log("NewLPartner=%u NewRPartner=%u\n", uNewLeftPartner, uNewRightPartner); Quit("Diff check failed"); } } } #endif }
void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs, unsigned IdToDiffsLeafNodeIndex[]) { #if TRACE Log("Tree1:\n"); Tree1.LogMe(); Log("\n"); Log("Tree2:\n"); Tree2.LogMe(); #endif if (!Tree1.IsRooted() || !Tree2.IsRooted()) Quit("DiffTrees: requires rooted trees"); const unsigned uNodeCount = Tree1.GetNodeCount(); const unsigned uNodeCount2 = Tree2.GetNodeCount(); const unsigned uLeafCount = Tree1.GetLeafCount(); const unsigned uLeafCount2 = Tree2.GetLeafCount(); assert(uLeafCount == uLeafCount2); if (uNodeCount != uNodeCount2) Quit("DiffTrees: different node counts"); // Allocate tables so we can convert tree node index to // and from the unique id with a O(1) lookup. unsigned *NodeIndexToId1 = new unsigned[uNodeCount]; unsigned *IdToNodeIndex2 = new unsigned[uNodeCount]; bool *bIsBachelor1 = new bool[uNodeCount]; bool *bIsDiff1 = new bool[uNodeCount]; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { NodeIndexToId1[uNodeIndex] = uNodeCount; bIsBachelor1[uNodeIndex] = false; bIsDiff1[uNodeIndex] = false; // Use uNodeCount as value meaning "not set". IdToNodeIndex2[uNodeIndex] = uNodeCount; } // Initialize node index <-> id lookup tables for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (Tree1.IsLeaf(uNodeIndex)) { const unsigned uId = Tree1.GetLeafId(uNodeIndex); if (uId >= uNodeCount) Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)"); NodeIndexToId1[uNodeIndex] = uId; } if (Tree2.IsLeaf(uNodeIndex)) { const unsigned uId = Tree2.GetLeafId(uNodeIndex); if (uId >= uNodeCount) Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)"); IdToNodeIndex2[uId] = uNodeIndex; } } // Validity check. This verifies that the ids // pre-assigned to the leaves in Tree1 are unique // (note that the id<N check above does not rule // out two leaves having duplicate ids). for (unsigned uId = 0; uId < uLeafCount; ++uId) { unsigned uNodeIndex2 = IdToNodeIndex2[uId]; if (uNodeCount == uNodeIndex2) Quit("DiffTrees, check 2"); } // Ids assigned to internal nodes are N, N+1 ... // An internal node id uniquely identifies a set // of two or more leaves. unsigned uInternalNodeId = uLeafCount; // Depth-first traversal of tree. // The order guarantees that a node is visited before // its parent is visited. for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode(); NULL_NEIGHBOR != uNodeIndex1; uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1)) { #if TRACE Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n", uNodeIndex1, Tree1.IsLeaf(uNodeIndex1), bIsBachelor1[uNodeIndex1]); #endif // Leaves are trivial; nothing to do. if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1]) continue; // If either child is a bachelor, flag // this node as a bachelor and continue. unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1); if (bIsBachelor1[uLeft1]) { bIsBachelor1[uNodeIndex1] = true; continue; } unsigned uRight1 = Tree1.GetRight(uNodeIndex1); if (bIsBachelor1[uRight1]) { bIsBachelor1[uNodeIndex1] = true; continue; } // Both children are married. // Married nodes are guaranteed to have an id. unsigned uIdLeft = NodeIndexToId1[uLeft1]; unsigned uIdRight = NodeIndexToId1[uRight1]; if (uIdLeft == uNodeCount || uIdRight == uNodeCount) Quit("DiffTrees, check 5"); // uLeft2 is the spouse of uLeft1, and similarly for uRight2. unsigned uLeft2 = IdToNodeIndex2[uIdLeft]; unsigned uRight2 = IdToNodeIndex2[uIdRight]; if (uLeft2 == uNodeCount || uRight2 == uNodeCount) Quit("DiffTrees, check 6"); // If the spouses of uLeft1 and uRight1 have the same // parent, then this parent is the spouse of uNodeIndex1. // Otherwise, uNodeIndex1 is a diff. unsigned uParentLeft2 = Tree2.GetParent(uLeft2); unsigned uParentRight2 = Tree2.GetParent(uRight2); #if TRACE Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n", uLeft1, uRight1, uLeft2, uRight2, uParentLeft2, uParentRight2); #endif if (uParentLeft2 == uParentRight2) { NodeIndexToId1[uNodeIndex1] = uInternalNodeId; IdToNodeIndex2[uInternalNodeId] = uParentLeft2; ++uInternalNodeId; } else bIsBachelor1[uNodeIndex1] = true; } unsigned uDiffCount = 0; for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex) { if (bIsBachelor1[uNodeIndex]) continue; if (Tree1.IsRoot(uNodeIndex)) { // Special case: if no bachelors, consider the // root a diff. if (!bIsBachelor1[uNodeIndex]) bIsDiff1[uNodeIndex] = true; continue; } const unsigned uParent = Tree1.GetParent(uNodeIndex); if (bIsBachelor1[uParent]) { bIsDiff1[uNodeIndex] = true; ++uDiffCount; } } #if TRACE Log("Tree1:\n"); Log("Node Id Bach Diff Name\n"); Log("---- ---- ---- ---- ----\n"); for (unsigned n = 0; n < uNodeCount; ++n) { Log("%4u %4u %d %d", n, NodeIndexToId1[n], bIsBachelor1[n], bIsDiff1[n]); if (Tree1.IsLeaf(n)) Log(" %s", Tree1.GetLeafName(n)); Log("\n"); } Log("\n"); Log("Tree2:\n"); Log("Node Id Name\n"); Log("---- ---- ----\n"); for (unsigned n = 0; n < uNodeCount; ++n) { Log("%4u ", n); if (Tree2.IsLeaf(n)) Log(" %s", Tree2.GetLeafName(n)); Log("\n"); } #endif Diffs.CreateRooted(); const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex(); const unsigned uRootIndex1 = Tree1.GetRootNodeIndex(); for (unsigned n = 0; n < uLeafCount; ++n) IdToDiffsLeafNodeIndex[n] = uNodeCount; BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex, IdToDiffsLeafNodeIndex); #if TRACE Log("\n"); Log("Diffs:\n"); Diffs.LogMe(); Log("\n"); Log("IdToDiffsLeafNodeIndex:"); for (unsigned n = 0; n < uLeafCount; ++n) { if (n%16 == 0) Log("\n"); else Log(" "); Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]); } Log("\n"); #endif for (unsigned n = 0; n < uLeafCount; ++n) if (IdToDiffsLeafNodeIndex[n] == uNodeCount) Quit("TreeDiffs check 7"); delete[] NodeIndexToId1; delete[] IdToNodeIndex2; delete[] bIsBachelor1; delete[] bIsDiff1; }