Example #1
0
static unsigned SubFamRecurse(const Tree &tree, unsigned uNodeIndex, unsigned uMaxLeafCount,
  unsigned SubFams[], unsigned &uSubFamCount)
	{
	if (tree.IsLeaf(uNodeIndex))
		return 1;

	unsigned uLeft = tree.GetLeft(uNodeIndex);
	unsigned uRight = tree.GetRight(uNodeIndex);
	unsigned uLeftCount = SubFamRecurse(tree, uLeft, uMaxLeafCount, SubFams, uSubFamCount);
	unsigned uRightCount = SubFamRecurse(tree, uRight, uMaxLeafCount, SubFams, uSubFamCount);

	unsigned uLeafCount = uLeftCount + uRightCount;
	if (uLeftCount + uRightCount > uMaxLeafCount)
		{
		if (uLeftCount <= uMaxLeafCount)
			SubFams[uSubFamCount++] = uLeft;
		if (uRightCount <= uMaxLeafCount)
			SubFams[uSubFamCount++] = uRight;
		}
	else if (tree.IsRoot(uNodeIndex))
		{
		if (uSubFamCount != 0)
			Quit("Error in SubFamRecurse");
		SubFams[uSubFamCount++] = uNodeIndex;
		}

	return uLeafCount;
	}
Example #2
0
bool PhyEnumBiParts(const Tree &tree, PhyEnumEdgeState &ES,
  unsigned Leaves1[], unsigned *ptruCount1,
  unsigned Leaves2[], unsigned *ptruCount2)
	{
	bool bOk = PhyEnumEdges(tree, ES);
	if (!bOk)
		{
		*ptruCount1 = 0;
		*ptruCount2 = 0;
		return false;
		}

// Special case: in a rooted tree, both edges from the root
// give the same bipartition, so skip one of them.
	if (tree.IsRooted() && tree.IsRoot(ES.m_uNodeIndex2)
	  && tree.GetRight(ES.m_uNodeIndex2) == ES.m_uNodeIndex1)
		{
		bOk = PhyEnumEdges(tree, ES);
		if (!bOk)
			return false;
		}

	PhyGetLeaves(tree, ES.m_uNodeIndex1, ES.m_uNodeIndex2, Leaves1, ptruCount1);
	PhyGetLeaves(tree, ES.m_uNodeIndex2, ES.m_uNodeIndex1, Leaves2, ptruCount2);

	if (*ptruCount1 + *ptruCount2 != tree.GetLeafCount())
		Quit("PhyEnumBiParts %u + %u != %u",
		  *ptruCount1, *ptruCount2, tree.GetLeafCount());
#if	DEBUG
	{
	for (unsigned i = 0; i < *ptruCount1; ++i)
		{
		if (!tree.IsLeaf(Leaves1[i]))
			Quit("PhyEnumByParts: not leaf");
		for (unsigned j = 0; j < *ptruCount2; ++j)
			{
			if (!tree.IsLeaf(Leaves2[j]))
				Quit("PhyEnumByParts: not leaf");
			if (Leaves1[i] == Leaves2[j])
				Quit("PhyEnumByParts: dupe");
			}
		}
	}
#endif

	return true;
	}
Example #3
0
static void SetInFam(const Tree &tree, unsigned uNodeIndex, bool NodeInSubFam[])
	{
	if (tree.IsLeaf(uNodeIndex))
		return;
	unsigned uLeft = tree.GetLeft(uNodeIndex);
	unsigned uRight = tree.GetRight(uNodeIndex);
	NodeInSubFam[uLeft] = true;
	NodeInSubFam[uRight] = true;

	SetInFam(tree, uLeft, NodeInSubFam);
	SetInFam(tree, uRight, NodeInSubFam);
	}
Example #4
0
static void BuildDiffs(const Tree &tree, unsigned uTreeNodeIndex,
  const bool bIsDiff[], Tree &Diffs, unsigned uDiffsNodeIndex,
  unsigned IdToDiffsLeafNodeIndex[])
	{
#if	TRACE
	Log("BuildDiffs(TreeNode=%u IsDiff=%d IsLeaf=%d)\n",
	  uTreeNodeIndex, bIsDiff[uTreeNodeIndex], tree.IsLeaf(uTreeNodeIndex));
#endif
	if (bIsDiff[uTreeNodeIndex])
		{
		unsigned uLeafCount = tree.GetLeafCount();
		unsigned *Leaves = new unsigned[uLeafCount];
		GetLeaves(tree, uTreeNodeIndex, Leaves, &uLeafCount);
		for (unsigned n = 0; n < uLeafCount; ++n)
			{
			const unsigned uLeafNodeIndex = Leaves[n];
			const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
			if (uId >= tree.GetLeafCount())
				Quit("BuildDiffs, id out of range");
			IdToDiffsLeafNodeIndex[uId] = uDiffsNodeIndex;
#if	TRACE
			Log("  Leaf id=%u DiffsNode=%u\n", uId, uDiffsNodeIndex);
#endif
			}
		delete[] Leaves;
		return;
		}

	if (tree.IsLeaf(uTreeNodeIndex))
		Quit("BuildDiffs: should never reach leaf");

	const unsigned uTreeLeft = tree.GetLeft(uTreeNodeIndex);
	const unsigned uTreeRight = tree.GetRight(uTreeNodeIndex);

	const unsigned uDiffsLeft = Diffs.AppendBranch(uDiffsNodeIndex);
	const unsigned uDiffsRight = uDiffsLeft + 1;

	BuildDiffs(tree, uTreeLeft, bIsDiff, Diffs, uDiffsLeft, IdToDiffsLeafNodeIndex);
	BuildDiffs(tree, uTreeRight, bIsDiff, Diffs, uDiffsRight, IdToDiffsLeafNodeIndex);
	}
Example #5
0
static void ClusterBySubfamCount_Iteration(const Tree &tree, unsigned Subfams[],
        unsigned uCount)
{
// Find highest child node of current set of subfamilies.
    double dHighestHeight = -1e20;
    int iParentSubscript = -1;

    for (int n = 0; n < (int) uCount; ++n)
    {
        const unsigned uNodeIndex = Subfams[n];
        if (tree.IsLeaf(uNodeIndex))
            continue;

        const unsigned uLeft = tree.GetLeft(uNodeIndex);
        const double dHeightLeft = tree.GetNodeHeight(uLeft);
        if (dHeightLeft > dHighestHeight)
        {
            dHighestHeight = dHeightLeft;
            iParentSubscript = n;
        }

        const unsigned uRight = tree.GetRight(uNodeIndex);
        const double dHeightRight = tree.GetNodeHeight(uRight);
        if (dHeightRight > dHighestHeight)
        {
            dHighestHeight = dHeightRight;
            iParentSubscript = n;
        }
    }

    if (-1 == iParentSubscript)
        Quit("CBSFCIter: failed to find highest child");

    const unsigned uNodeIndex = Subfams[iParentSubscript];
    const unsigned uLeft = tree.GetLeft(uNodeIndex);
    const unsigned uRight = tree.GetRight(uNodeIndex);

// Delete parent by replacing with left child
    Subfams[iParentSubscript] = uLeft;

// Append right child to list
    Subfams[uCount] = uRight;

#if	TRACE
    {
        Log("Iter %3u:", uCount);
        for (unsigned n = 0; n < uCount; ++n)
            Log(" %u", Subfams[n]);
        Log("\n");
    }
#endif
}
Example #6
0
void GetInternalNodesInHeightOrder(const Tree &tree, unsigned NodeIndexes[])
	{
	const unsigned uNodeCount = tree.GetNodeCount();
	if (uNodeCount < 3)
		Quit("GetInternalNodesInHeightOrder: %u nodes, none are internal",
		  uNodeCount);
	const unsigned uInternalNodeCount = (uNodeCount - 1)/2;
	double *Heights = new double[uInternalNodeCount];

	unsigned uIndex = 0;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (tree.IsLeaf(uNodeIndex))
			continue;
		NodeIndexes[uIndex] = uNodeIndex;
		Heights[uIndex] = tree.GetNodeHeight(uNodeIndex);
		++uIndex;
		}
	if (uIndex != uInternalNodeCount)
		Quit("Internal error: GetInternalNodesInHeightOrder");

// Simple but slow bubble sort (probably don't care about speed here)
	bool bDone = false;
	while (!bDone)
		{
		bDone = true;
		for (unsigned i = 0; i < uInternalNodeCount - 1; ++i)
			{
			if (Heights[i] > Heights[i+1])
				{
				double dTmp = Heights[i];
				Heights[i] = Heights[i+1];
				Heights[i+1] = dTmp;

				unsigned uTmp = NodeIndexes[i];
				NodeIndexes[i] = NodeIndexes[i+1];
				NodeIndexes[i+1] = uTmp;
				bDone = false;
				}
			}
		}
#if	TRACE
	Log("Internal node index     Height\n");
	Log("-------------------   --------\n");
	//    1234567890123456789  123456789
	for (unsigned n = 0; n < uInternalNodeCount; ++n)
		Log("%19u  %9.3f\n", NodeIndexes[n], Heights[n]);
#endif
	delete[] Heights;
	}
Example #7
0
static unsigned GetNextNodeIndex(const Tree &tree, unsigned uPrevNodeIndex)
	{
	if (g_bStable)
		{
		const unsigned uNodeCount = tree.GetNodeCount();
		unsigned uNodeIndex = uPrevNodeIndex;
		for (;;)
			{
			++uNodeIndex;
			if (uNodeIndex >= uNodeCount)
				return NULL_NEIGHBOR;
			if (tree.IsLeaf(uNodeIndex))
				return uNodeIndex;
			}
		}
	unsigned uNodeIndex = uPrevNodeIndex;
	for (;;)
		{
		uNodeIndex = tree.NextDepthFirstNode(uNodeIndex);
		if (NULL_NEIGHBOR == uNodeIndex || tree.IsLeaf(uNodeIndex))
			return uNodeIndex;
		}
	}
Example #8
0
static void GetLeavesRecurse(const Tree &tree, unsigned uNodeIndex,
                             unsigned Leaves[], unsigned &uLeafCount /* in-out */)
{
    if (tree.IsLeaf(uNodeIndex))
    {
        Leaves[uLeafCount] = uNodeIndex;
        ++uLeafCount;
        return;
    }

    const unsigned uLeft = tree.GetLeft(uNodeIndex);
    const unsigned uRight = tree.GetRight(uNodeIndex);

    GetLeavesRecurse(tree, uLeft, Leaves, uLeafCount);
    GetLeavesRecurse(tree, uRight, Leaves, uLeafCount);
}
Example #9
0
static unsigned CountLeaves(const Tree &tree, unsigned uNodeIndex,
  unsigned LeavesUnderNode[])
	{
	if (tree.IsLeaf(uNodeIndex))
		{
		LeavesUnderNode[uNodeIndex] = 1;
		return 1;
		}

	const unsigned uLeft = tree.GetLeft(uNodeIndex);
	const unsigned uRight = tree.GetRight(uNodeIndex);
	const unsigned uRightCount = CountLeaves(tree, uRight, LeavesUnderNode);
	const unsigned uLeftCount = CountLeaves(tree, uLeft, LeavesUnderNode);
	const unsigned uCount = uRightCount + uLeftCount;
	LeavesUnderNode[uNodeIndex] = uCount;
	return uCount;
	}
Example #10
0
static void GetLeavesSubtree(const Tree &tree, unsigned uNodeIndex1,
  const unsigned uNodeIndex2, unsigned Leaves[], unsigned *ptruCount)
	{
	if (tree.IsLeaf(uNodeIndex1))
		{
		Leaves[*ptruCount] = uNodeIndex1;
		++(*ptruCount);
		return;
		}

	const unsigned uLeft = tree.GetFirstNeighbor(uNodeIndex1, uNodeIndex2);
	const unsigned uRight = tree.GetSecondNeighbor(uNodeIndex1, uNodeIndex2);
	if (NULL_NEIGHBOR != uLeft)
		GetLeavesSubtree(tree, uLeft, uNodeIndex1, Leaves, ptruCount);
	if (NULL_NEIGHBOR != uRight)
		GetLeavesSubtree(tree, uRight, uNodeIndex1, Leaves, ptruCount);
	}
Example #11
0
static void GetLeavesSubtreeExcluding(const Tree &tree, unsigned uNodeIndex,
  unsigned uExclude, unsigned Leaves[], unsigned *ptruCount)
	{
	if (uNodeIndex == uExclude)
		return;

	if (tree.IsLeaf(uNodeIndex))
		{
		Leaves[*ptruCount] = uNodeIndex;
		++(*ptruCount);
		return;
		}

	const unsigned uLeft = tree.GetLeft(uNodeIndex);
	const unsigned uRight = tree.GetRight(uNodeIndex);
	if (NULL_NEIGHBOR != uLeft)
		GetLeavesSubtreeExcluding(tree, uLeft, uExclude, Leaves, ptruCount);
	if (NULL_NEIGHBOR != uRight)
		GetLeavesSubtreeExcluding(tree, uRight, uExclude, Leaves, ptruCount);
	}
Example #12
0
static void CalcInfo(const Tree &tree, unsigned uNode1, unsigned uNode2, EdgeInfo **EIs)
	{
	const unsigned uNeighborIndex = tree.GetNeighborSubscript(uNode1, uNode2);
	EdgeInfo &EI = EIs[uNode1][uNeighborIndex];
	EI.m_uNode1 = uNode1;
	EI.m_uNode2 = uNode2;

	if (tree.IsLeaf(uNode2))
		{
		EI.m_dMaxDistToLeaf = 0;
		EI.m_dTotalDistToLeaves = 0;
		EI.m_uMaxStep = NULL_NEIGHBOR;
		EI.m_uMostDistantLeaf = uNode2;
		EI.m_uLeafCount = 1;
		EI.m_bSet = true;
		return;
		}

	double dMaxDistToLeaf = -1e29;
	double dTotalDistToLeaves = 0.0;
	unsigned uLeafCount = 0;
	unsigned uMostDistantLeaf = NULL_NEIGHBOR;
	unsigned uMaxStep = NULL_NEIGHBOR;

	const unsigned uNeighborCount = tree.GetNeighborCount(uNode2);
	for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
		{
		const unsigned uNode3 = tree.GetNeighbor(uNode2, uSub);
		if (uNode3 == uNode1)
			continue;
		const EdgeInfo &EINext = EIs[uNode2][uSub];
		if (!EINext.m_bSet)
			Quit("CalcInfo: internal error, dist %u->%u not known",
				uNode2, uNode3);


		uLeafCount += EINext.m_uLeafCount;

		const double dEdgeLength = tree.GetEdgeLength(uNode2, uNode3);
		const double dTotalDist = EINext.m_dTotalDistToLeaves +
		  EINext.m_uLeafCount*dEdgeLength;
		dTotalDistToLeaves += dTotalDist;

		const double dDist = EINext.m_dMaxDistToLeaf + dEdgeLength;
		if (dDist > dMaxDistToLeaf)
			{
			dMaxDistToLeaf = dDist;
			uMostDistantLeaf = EINext.m_uMostDistantLeaf;
			uMaxStep = uNode3;
			}
		}
	if (NULL_NEIGHBOR == uMaxStep || NULL_NEIGHBOR == uMostDistantLeaf ||
	  0 == uLeafCount)
		Quit("CalcInfo: internal error 2");

	const double dThisDist = tree.GetEdgeLength(uNode1, uNode2);
	EI.m_dMaxDistToLeaf = dMaxDistToLeaf;
	EI.m_dTotalDistToLeaves = dTotalDistToLeaves;
	EI.m_uMaxStep = uMaxStep;
	EI.m_uMostDistantLeaf = uMostDistantLeaf;
	EI.m_uLeafCount = uLeafCount;
	EI.m_bSet = true;
	}
Example #13
0
static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,
  unsigned *ptruNode1, unsigned *ptruNode2,
  double *ptrdLength1, double *ptrdLength2)
	{
	const unsigned uNodeCount = tree.GetNodeCount();

	unsigned uLeaf1 = NULL_NEIGHBOR;
	unsigned uMostDistantLeaf = NULL_NEIGHBOR;
	double dMaxDist = -VERY_LARGE_DOUBLE;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (!tree.IsLeaf(uNodeIndex))
			continue;

		const unsigned uNode2 = tree.GetNeighbor1(uNodeIndex);
		if (NULL_NEIGHBOR == uNode2)
			Quit("RootByMidLongestSpan: internal error 0");
		const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNode2);
		const EdgeInfo &EI = EIs[uNodeIndex][0];
		if (!EI.m_bSet)
			Quit("RootByMidLongestSpan: internal error 1");
		if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNode2)
			Quit("RootByMidLongestSpan: internal error 2");
		const double dSpanLength = dEdgeLength + EI.m_dMaxDistToLeaf;
		if (dSpanLength > dMaxDist)
			{
			dMaxDist = dSpanLength;
			uLeaf1 = uNodeIndex;
			uMostDistantLeaf = EI.m_uMostDistantLeaf;
			}
		}
	
	if (NULL_NEIGHBOR == uLeaf1)
		Quit("RootByMidLongestSpan: internal error 3");

	const double dTreeHeight = dMaxDist/2.0;
	unsigned uNode1 = uLeaf1;
	unsigned uNode2 = tree.GetNeighbor1(uLeaf1);
	double dAccumSpanLength = 0;

#if	TRACE
	Log("RootByMidLongestSpan: span=%u", uLeaf1);
#endif

	for (;;)
		{
		const double dEdgeLength = tree.GetEdgeLength(uNode1, uNode2);
#if	TRACE
		Log("->%u(%g;%g)", uNode2, dEdgeLength, dAccumSpanLength);
#endif
		if (dAccumSpanLength + dEdgeLength >= dTreeHeight)
			{
			*ptruNode1 = uNode1;
			*ptruNode2 = uNode2;
			*ptrdLength1 = dTreeHeight - dAccumSpanLength;
			*ptrdLength2 = dEdgeLength - *ptrdLength1;
#if	TRACE
			{
			const EdgeInfo &EI = EIs[uLeaf1][0];
			Log("...\n");
			Log("Midpoint: Leaf1=%u Leaf2=%u Node1=%u Node2=%u Length1=%g Length2=%g\n",
			  uLeaf1, EI.m_uMostDistantLeaf, *ptruNode1, *ptruNode2, *ptrdLength1, *ptrdLength2);
			}
#endif
			return;
			}

		if (tree.IsLeaf(uNode2))
			Quit("RootByMidLongestSpan: internal error 4");

		dAccumSpanLength += dEdgeLength;
		const unsigned uSub = tree.GetNeighborSubscript(uNode1, uNode2);
		const EdgeInfo &EI = EIs[uNode1][uSub];
		if (!EI.m_bSet)
			Quit("RootByMidLongestSpan: internal error 5");

		uNode1 = uNode2;
		uNode2 = EI.m_uMaxStep;
		}
	}
Example #14
0
void DoMuscle(CompositeVect*CVLocation)
	{
	SetOutputFileName(g_pstrOutFileName);
	SetInputFileName(g_pstrInFileName);

	SetMaxIters(g_uMaxIters);
	SetSeqWeightMethod(g_SeqWeight1);

	TextFile fileIn(g_pstrInFileName);
	SeqVect v;
	v.FromFASTAFile(fileIn);
	const unsigned uSeqCount = v.Length();

	if (0 == uSeqCount)
		Quit("No sequences in input file");

	ALPHA Alpha = ALPHA_Undefined;
	switch (g_SeqType)
		{
	case SEQTYPE_Auto:
		Alpha = v.GuessAlpha();
		break;

	case SEQTYPE_Protein:
		Alpha = ALPHA_Amino;
		break;

	case SEQTYPE_DNA:
		Alpha = ALPHA_DNA;
		break;

	case SEQTYPE_RNA:
		Alpha = ALPHA_RNA;
		break;

	default:
		Quit("Invalid seq type");
		}
	SetAlpha(Alpha);
	v.FixAlpha();

	PTR_SCOREMATRIX UserMatrix = 0;
	if (0 != g_pstrMatrixFileName)
		{
		const char *FileName = g_pstrMatrixFileName;
		const char *Path = getenv("MUSCLE_MXPATH");
		if (Path != 0)
			{
			size_t n = strlen(Path) + 1 + strlen(FileName) + 1;
			char *NewFileName = new char[n];
			sprintf(NewFileName, "%s/%s", Path, FileName);
			FileName = NewFileName;
			}
		TextFile File(FileName);
		UserMatrix = ReadMx(File);
		g_Alpha = ALPHA_Amino;
		g_PPScore = PPSCORE_SP;
		}

	SetPPScore();

	if (0 != UserMatrix)
		g_ptrScoreMatrix = UserMatrix;

	unsigned uMaxL = 0;
	unsigned uTotL = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned L = v.GetSeq(uSeqIndex).Length();
		uTotL += L;
		if (L > uMaxL)
			uMaxL = L;
		}

	SetIter(1);
	g_bDiags = g_bDiags1;
	SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);

	SetMuscleSeqVect(v);

	MSA::SetIdCount(uSeqCount);

// Initialize sequence ids.
// From this point on, ids must somehow propogate from here.
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		v.SetSeqId(uSeqIndex, uSeqIndex);

	if (0 == uSeqCount)
		Quit("Input file '%s' has no sequences", g_pstrInFileName);
	if (1 == uSeqCount)
		{
		TextFile fileOut(g_pstrOutFileName, true);
		v.ToFile(fileOut);
		return;
		}

	if (uSeqCount > 1)
		MHackStart(v);

// First iteration
	Tree GuideTree;
	if (0 != g_pstrUseTreeFileName)
	{
	// Discourage users...
		if (!g_bUseTreeNoWarn)
			fprintf(stderr, "%s", g_strUseTreeWarning);

	// Read tree from file
		TextFile TreeFile(g_pstrUseTreeFileName);
		GuideTree.FromFile(TreeFile);

	// Make sure tree is rooted
		if (!GuideTree.IsRooted())
			Quit("User tree must be rooted");

		if (GuideTree.GetLeafCount() != uSeqCount)
			Quit("User tree does not match input sequences");

		const unsigned uNodeCount = GuideTree.GetNodeCount();
		for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
			{
			if (!GuideTree.IsLeaf(uNodeIndex))
				continue;
			const char *LeafName = GuideTree.GetLeafName(uNodeIndex);
			unsigned uSeqIndex;
			bool SeqFound = v.FindName(LeafName, &uSeqIndex);
			if (!SeqFound)
				Quit("Label %s in tree does not match sequences", LeafName);
			unsigned uId = v.GetSeqIdFromName(LeafName);
			GuideTree.SetLeafId(uNodeIndex, uId);
			}
		}
	else
		TreeFromSeqVect(v, GuideTree, g_Cluster1, g_Distance1, g_Root1,
		  g_pstrDistMxFileName1);

	const char *Tree1 = ValueOpt("Tree1");
	if (0 != Tree1)
		{
		TextFile f(Tree1, true);
		GuideTree.ToFile(f);
		if (g_bClusterOnly)
			return;
		}

	SetMuscleTree(GuideTree);
	ValidateMuscleIds(GuideTree);

	MSA msa;
	msa.SetCompositeVector(CVLocation);
	ProgNode *ProgNodes = 0;
	if (g_bLow)
		ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
	else
		ProgressiveAlign(v, GuideTree, msa);
	SetCurrentAlignment(msa);

	if (0 != g_pstrComputeWeightsFileName)
		{
		extern void OutWeights(const char *FileName, const MSA &msa);
		SetMSAWeightsMuscle(msa);
		OutWeights(g_pstrComputeWeightsFileName, msa);
		return;
		}

	ValidateMuscleIds(msa);

	if (1 == g_uMaxIters || 2 == uSeqCount)
		{
		//TextFile fileOut(g_pstrOutFileName, true);
		//MHackEnd(msa);
		//msa.ToFile(fileOut);
		MuscleOutput(msa);
		return;
		}

	if (0 == g_pstrUseTreeFileName)
		{
		g_bDiags = g_bDiags2;
		SetIter(2);

		if (g_bLow)
			{
			if (0 != g_uMaxTreeRefineIters)
				RefineTreeE(msa, v, GuideTree, ProgNodes);
			}
		else
			RefineTree(msa, GuideTree);

		const char *Tree2 = ValueOpt("Tree2");
		if (0 != Tree2)
			{
			TextFile f(Tree2, true);
			GuideTree.ToFile(f);
			}
		}

	SetSeqWeightMethod(g_SeqWeight2);
	SetMuscleTree(GuideTree);

	if (g_bAnchors)
		RefineVert(msa, GuideTree, g_uMaxIters - 2);
	else
		RefineHoriz(msa, GuideTree, g_uMaxIters - 2, false, false);

#if	0
// Refining by subfamilies is disabled as it didn't give better
// results. I tried doing this before and after RefineHoriz.
// Should get back to this as it seems like this should work.
	RefineSubfams(msa, GuideTree, g_uMaxIters - 2);
#endif

	ValidateMuscleIds(msa);
	ValidateMuscleIds(GuideTree);

	//TextFile fileOut(g_pstrOutFileName, true);
	//MHackEnd(msa);
	//msa.ToFile(fileOut);
	MuscleOutput(msa);
	}
Example #15
0
void DiffTrees(const Tree &Tree1, const Tree &Tree2, Tree &Diffs,
  unsigned IdToDiffsLeafNodeIndex[])
	{
#if	TRACE
	Log("Tree1:\n");
	Tree1.LogMe();
	Log("\n");
	Log("Tree2:\n");
	Tree2.LogMe();
#endif

	if (!Tree1.IsRooted() || !Tree2.IsRooted())
		Quit("DiffTrees: requires rooted trees");

	const unsigned uNodeCount = Tree1.GetNodeCount();
	const unsigned uNodeCount2 = Tree2.GetNodeCount();
	
	const unsigned uLeafCount = Tree1.GetLeafCount();
	const unsigned uLeafCount2 = Tree2.GetLeafCount();
	assert(uLeafCount == uLeafCount2);

	if (uNodeCount != uNodeCount2)
		Quit("DiffTrees: different node counts");

// Allocate tables so we can convert tree node index to
// and from the unique id with a O(1) lookup.
	unsigned *NodeIndexToId1 = new unsigned[uNodeCount];
	unsigned *IdToNodeIndex2 = new unsigned[uNodeCount];

	bool *bIsBachelor1 = new bool[uNodeCount];
	bool *bIsDiff1 = new bool[uNodeCount];

	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		NodeIndexToId1[uNodeIndex] = uNodeCount;
		bIsBachelor1[uNodeIndex] = false;
		bIsDiff1[uNodeIndex] = false;

	// Use uNodeCount as value meaning "not set".
		IdToNodeIndex2[uNodeIndex] = uNodeCount;
		}

// Initialize node index <-> id lookup tables
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (Tree1.IsLeaf(uNodeIndex))
			{
			const unsigned uId = Tree1.GetLeafId(uNodeIndex);
			if (uId >= uNodeCount)
				Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
			NodeIndexToId1[uNodeIndex] = uId;
			}

		if (Tree2.IsLeaf(uNodeIndex))
			{
			const unsigned uId = Tree2.GetLeafId(uNodeIndex);
			if (uId >= uNodeCount)
				Quit("Diff trees requires existing leaf ids in range 0 .. (N-1)");
			IdToNodeIndex2[uId] = uNodeIndex;
			}
		}

// Validity check. This verifies that the ids
// pre-assigned to the leaves in Tree1 are unique
// (note that the id<N check above does not rule
// out two leaves having duplicate ids).
	for (unsigned uId = 0; uId < uLeafCount; ++uId)
		{
		unsigned uNodeIndex2 = IdToNodeIndex2[uId];
		if (uNodeCount == uNodeIndex2)
			Quit("DiffTrees, check 2");
		}

// Ids assigned to internal nodes are N, N+1 ...
// An internal node id uniquely identifies a set
// of two or more leaves.
	unsigned uInternalNodeId = uLeafCount;

// Depth-first traversal of tree.
// The order guarantees that a node is visited before
// its parent is visited.
	for (unsigned uNodeIndex1 = Tree1.FirstDepthFirstNode();
	  NULL_NEIGHBOR != uNodeIndex1;
	  uNodeIndex1 = Tree1.NextDepthFirstNode(uNodeIndex1))
		{
#if	TRACE
		Log("Main loop: Node1=%u IsLeaf=%d IsBachelor=%d\n",
		  uNodeIndex1,
		  Tree1.IsLeaf(uNodeIndex1),
		  bIsBachelor1[uNodeIndex1]);
#endif

	// Leaves are trivial; nothing to do.
		if (Tree1.IsLeaf(uNodeIndex1) || bIsBachelor1[uNodeIndex1])
			continue;

	// If either child is a bachelor, flag
	// this node as a bachelor and continue.
		unsigned uLeft1 = Tree1.GetLeft(uNodeIndex1);
		if (bIsBachelor1[uLeft1])
			{
			bIsBachelor1[uNodeIndex1] = true;
			continue;
			}

		unsigned uRight1 = Tree1.GetRight(uNodeIndex1);
		if (bIsBachelor1[uRight1])
			{
			bIsBachelor1[uNodeIndex1] = true;
			continue;
			}

	// Both children are married.
	// Married nodes are guaranteed to have an id.
		unsigned uIdLeft = NodeIndexToId1[uLeft1];
		unsigned uIdRight = NodeIndexToId1[uRight1];

		if (uIdLeft == uNodeCount || uIdRight == uNodeCount)
			Quit("DiffTrees, check 5");

	// uLeft2 is the spouse of uLeft1, and similarly for uRight2.
		unsigned uLeft2 = IdToNodeIndex2[uIdLeft];
		unsigned uRight2 = IdToNodeIndex2[uIdRight];

		if (uLeft2 == uNodeCount || uRight2 == uNodeCount)
			Quit("DiffTrees, check 6");

	// If the spouses of uLeft1 and uRight1 have the same
	// parent, then this parent is the spouse of uNodeIndex1.
	// Otherwise, uNodeIndex1 is a diff.
		unsigned uParentLeft2 = Tree2.GetParent(uLeft2);
		unsigned uParentRight2 = Tree2.GetParent(uRight2);

#if	TRACE
		Log("L1=%u R1=%u L2=%u R2=%u PL2=%u PR2=%u\n",
		  uLeft1,
		  uRight1,
		  uLeft2,
		  uRight2,
		  uParentLeft2,
		  uParentRight2);
#endif

		if (uParentLeft2 == uParentRight2)
			{
			NodeIndexToId1[uNodeIndex1] = uInternalNodeId;
			IdToNodeIndex2[uInternalNodeId] = uParentLeft2;
			++uInternalNodeId;
			}
		else
			bIsBachelor1[uNodeIndex1] = true;
		}

	unsigned uDiffCount = 0;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (bIsBachelor1[uNodeIndex])
			continue;
		if (Tree1.IsRoot(uNodeIndex))
			{
		// Special case: if no bachelors, consider the
		// root a diff.
			if (!bIsBachelor1[uNodeIndex])
				bIsDiff1[uNodeIndex] = true;
			continue;
			}
		const unsigned uParent = Tree1.GetParent(uNodeIndex);
		if (bIsBachelor1[uParent])
			{
			bIsDiff1[uNodeIndex] = true;
			++uDiffCount;
			}
		}

#if	TRACE
	Log("Tree1:\n");
	Log("Node    Id  Bach  Diff  Name\n");
	Log("----  ----  ----  ----  ----\n");
	for (unsigned n = 0; n < uNodeCount; ++n)
		{
		Log("%4u  %4u     %d     %d",
		  n,
		  NodeIndexToId1[n],
		  bIsBachelor1[n],
		  bIsDiff1[n]);
		if (Tree1.IsLeaf(n))
			Log("  %s", Tree1.GetLeafName(n));
		Log("\n");
		}
	Log("\n");
	Log("Tree2:\n");
	Log("Node    Id              Name\n");
	Log("----  ----              ----\n");
	for (unsigned n = 0; n < uNodeCount; ++n)
		{
		Log("%4u                  ", n);
		if (Tree2.IsLeaf(n))
			Log("  %s", Tree2.GetLeafName(n));
		Log("\n");
		}
#endif

	Diffs.CreateRooted();
	const unsigned uDiffsRootIndex = Diffs.GetRootNodeIndex();
	const unsigned uRootIndex1 = Tree1.GetRootNodeIndex();

	for (unsigned n = 0; n < uLeafCount; ++n)
		IdToDiffsLeafNodeIndex[n] = uNodeCount;

	BuildDiffs(Tree1, uRootIndex1, bIsDiff1, Diffs, uDiffsRootIndex,
	  IdToDiffsLeafNodeIndex);

#if TRACE
	Log("\n");
	Log("Diffs:\n");
	Diffs.LogMe();
	Log("\n");
	Log("IdToDiffsLeafNodeIndex:");
	for (unsigned n = 0; n < uLeafCount; ++n)
		{
		if (n%16 == 0)
			Log("\n");
		else
			Log(" ");
		Log("%u=%u", n, IdToDiffsLeafNodeIndex[n]);
		}
	Log("\n");
#endif

	for (unsigned n = 0; n < uLeafCount; ++n)
		if (IdToDiffsLeafNodeIndex[n] == uNodeCount)
			Quit("TreeDiffs check 7");

	delete[] NodeIndexToId1;
	delete[] IdToNodeIndex2;

	delete[] bIsBachelor1;
	delete[] bIsDiff1;
	}
Example #16
0
static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],
  unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa)
	{
	const unsigned uNodeCount = tree.GetNodeCount();

	bool *Ready = new bool[uNodeCount];
	MSA **MSAs = new MSA *[uNodeCount];
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		Ready[uNodeIndex] = false;
		MSAs[uNodeIndex] = 0;
		}

	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
		{
		unsigned uNodeIndex = Subfams[uSubfamIndex];
		Ready[uNodeIndex] = true;
		MSA *ptrMSA = new MSA;
	// TODO: Wasteful copy, needs re-design
		ptrMSA->Copy(SubfamMSAs[uSubfamIndex]);
		MSAs[uNodeIndex] = ptrMSA;
		}

	for (unsigned uNodeIndex = tree.FirstDepthFirstNode();
	  NULL_NEIGHBOR != uNodeIndex;
	  uNodeIndex = tree.NextDepthFirstNode(uNodeIndex))
		{
		if (tree.IsLeaf(uNodeIndex))
			continue;

		unsigned uRight = tree.GetRight(uNodeIndex);
		unsigned uLeft = tree.GetLeft(uNodeIndex);
		if (!Ready[uRight] || !Ready[uLeft])
			continue;

		MSA *ptrLeft = MSAs[uLeft];
		MSA *ptrRight = MSAs[uRight];
		assert(ptrLeft != 0 && ptrRight != 0);

		MSA *ptrParent = new MSA;

		PWPath Path;
		AlignTwoMSAs(*ptrLeft, *ptrRight, *ptrParent, Path);

		MSAs[uNodeIndex] = ptrParent;
		Ready[uNodeIndex] = true;
		Ready[uLeft] = false;
		Ready[uRight] = false;

		delete MSAs[uLeft];
		delete MSAs[uRight];
		MSAs[uLeft] = 0;
		MSAs[uRight] = 0;
		}

#if	DEBUG
	{
	unsigned uReadyCount = 0;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (Ready[uNodeIndex])
			{
			assert(tree.IsRoot(uNodeIndex));
			++uReadyCount;
			assert(0 != MSAs[uNodeIndex]);
			}
		else
			assert(0 == MSAs[uNodeIndex]);
		}
	assert(1 == uReadyCount);
	}
#endif

	const unsigned uRoot = tree.GetRootNodeIndex();
	MSA *ptrRootAlignment = MSAs[uRoot];

	msa.Copy(*ptrRootAlignment);

	delete ptrRootAlignment;
    delete[] Ready;

#if	TRACE
	Log("After refine subfamilies, root alignment=\n");
	msa.LogMe();
#endif
	}
Example #17
0
void ProgressiveAlign(const SeqVect &v, const Tree &GuideTree, MSA &a)
	{
	assert(GuideTree.IsRooted());

#if	TRACE
	Log("GuideTree:\n");
	GuideTree.LogMe();
#endif

	const unsigned uSeqCount = v.Length();
	const unsigned uNodeCount = 2*uSeqCount - 1;

	ProgNode *ProgNodes = new ProgNode[uNodeCount];

	unsigned uJoin = 0;
	unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode();
	SetProgressDesc("Align node");
	do
		{
		if (GuideTree.IsLeaf(uTreeNodeIndex))
			{
			if (uTreeNodeIndex >= uNodeCount)
				Quit("TreeNodeIndex=%u NodeCount=%u\n", uTreeNodeIndex, uNodeCount);
			ProgNode &Node = ProgNodes[uTreeNodeIndex];
			unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
			if (uId >= uSeqCount)
				Quit("Seq index out of range");
			const Seq &s = *(v[uId]);
			Node.m_MSA.FromSeq(s);
			Node.m_MSA.SetSeqId(0, uId);
			Node.m_uLength = Node.m_MSA.GetColCount();
			}
		else
			{
			Progress(uJoin, uSeqCount - 1);
			++uJoin;

			const unsigned uMergeNodeIndex = uTreeNodeIndex;
			ProgNode &Parent = ProgNodes[uMergeNodeIndex];

			const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex);
			const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex);

			ProgNode &Node1 = ProgNodes[uLeft];
			ProgNode &Node2 = ProgNodes[uRight];

			PWPath Path;
			AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, Path);
			Parent.m_uLength = Parent.m_MSA.GetColCount();

			Node1.m_MSA.Clear();
			Node2.m_MSA.Clear();
			}
		uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex);
		}
	while (NULL_NEIGHBOR != uTreeNodeIndex);
	ProgressStepsDone();

	unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
	const ProgNode &RootProgNode = ProgNodes[uRootNodeIndex];
	a.Copy(RootProgNode.m_MSA);

	delete[] ProgNodes;
	ProgNodes = 0;
	}
Example #18
0
void DoMuscle()
	{
	SetOutputFileName(g_pstrOutFileName.get());
	SetInputFileName(g_pstrInFileName.get());

	SetMaxIters(g_uMaxIters.get());
	SetSeqWeightMethod(g_SeqWeight1.get());

	TextFile fileIn(g_pstrInFileName.get());
	SeqVect v;
	v.FromFASTAFile(fileIn);
	const unsigned uSeqCount = v.Length();

	if (0 == uSeqCount)
		Quit("No sequences in input file");

	ALPHA Alpha = ALPHA_Undefined;
	switch (g_SeqType.get())
		{
	case SEQTYPE_Auto:
		Alpha = v.GuessAlpha();
		break;

	case SEQTYPE_Protein:
		Alpha = ALPHA_Amino;
		break;

	case SEQTYPE_DNA:
		Alpha = ALPHA_DNA;
		break;

	case SEQTYPE_RNA:
		Alpha = ALPHA_RNA;
		break;

	default:
		Quit("Invalid seq type");
		}
	SetAlpha(Alpha);
	v.FixAlpha();

//
// AED 21/12/06: Moved matrix loading code inside the PP param function so it gets called for all alignment types
//
	SetPPScore();


	unsigned uMaxL = 0;
	unsigned uTotL = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		unsigned L = v.GetSeq(uSeqIndex).Length();
		uTotL += L;
		if (L > uMaxL)
			uMaxL = L;
		}

	SetIter(1);
	g_bDiags.get() = g_bDiags1.get();
	SetSeqStats(uSeqCount, uMaxL, uTotL/uSeqCount);

	SetMuscleSeqVect(v);

	MSA::SetIdCount(uSeqCount);

// Initialize sequence ids.
// From this point on, ids must somehow propogate from here.
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		v.SetSeqId(uSeqIndex, uSeqIndex);

	if (0 == uSeqCount)
		Quit("Input file '%s' has no sequences", g_pstrInFileName.get());
	if (1 == uSeqCount)
		{
		TextFile fileOut(g_pstrOutFileName.get(), true);
		v.ToFile(fileOut);
		return;
		}

	if (uSeqCount > 1)
		MHackStart(v);

// First iteration
	Tree GuideTree;
	if (0 != g_pstrUseTreeFileName.get())
		{
	// Discourage users...
		if (!g_bUseTreeNoWarn.get())
			fprintf(stderr, g_strUseTreeWarning);

	// Read tree from file
		TextFile TreeFile(g_pstrUseTreeFileName.get());
		GuideTree.FromFile(TreeFile);

	// Make sure tree is rooted
		if (!GuideTree.IsRooted())
			Quit("User tree must be rooted");

		if (GuideTree.GetLeafCount() != uSeqCount)
			Quit("User tree does not match input sequences");

		const unsigned uNodeCount = GuideTree.GetNodeCount();
		for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
			{
			if (!GuideTree.IsLeaf(uNodeIndex))
				continue;
			const char *LeafName = GuideTree.GetLeafName(uNodeIndex);
			unsigned uSeqIndex;
			bool SeqFound = v.FindName(LeafName, &uSeqIndex);
			if (!SeqFound)
				Quit("Label %s in tree does not match sequences", LeafName);
			unsigned uId = v.GetSeqIdFromName(LeafName);
			GuideTree.SetLeafId(uNodeIndex, uId);
			}
		}
	else
		TreeFromSeqVect(v, GuideTree, g_Cluster1.get(), g_Distance1.get(), g_Root1.get(),
		  g_pstrDistMxFileName1.get());

	const char *Tree1 = ValueOpt("Tree1");
	if (0 != Tree1)
		{
		TextFile f(Tree1, true);
		GuideTree.ToFile(f);
		if (g_bClusterOnly.get())
			return;
		}

	SetMuscleTree(GuideTree);
	ValidateMuscleIds(GuideTree);

	MSA msa;
	ProgNode *ProgNodes = 0;
	if (g_bLow.get())
		ProgNodes = ProgressiveAlignE(v, GuideTree, msa);
	else
		ProgressiveAlign(v, GuideTree, msa);
	SetCurrentAlignment(msa);

	if (0 != g_pstrComputeWeightsFileName.get())
		{
		extern void OutWeights(const char *FileName, const MSA &msa);
		SetMSAWeightsMuscle(msa);
		OutWeights(g_pstrComputeWeightsFileName.get(), msa);
		return;
		}

	ValidateMuscleIds(msa);

	if (1 == g_uMaxIters.get() || 2 == uSeqCount)
		{
		//TextFile fileOut(g_pstrOutFileName.get(), true);
		//MHackEnd(msa);
		//msa.ToFile(fileOut);
		MuscleOutput(msa);
		return;
		}

	if (0 == g_pstrUseTreeFileName.get())
		{
		g_bDiags.get() = g_bDiags2.get();
		SetIter(2);

		if (g_bLow.get())
			{
			if (0 != g_uMaxTreeRefineIters.get())
				RefineTreeE(msa, v, GuideTree, ProgNodes);
			}
		else
			RefineTree(msa, GuideTree);

		const char *Tree2 = ValueOpt("Tree2");
		if (0 != Tree2)
			{
			TextFile f(Tree2, true);
			GuideTree.ToFile(f);
			}
		}

	SetSeqWeightMethod(g_SeqWeight2.get());
	SetMuscleTree(GuideTree);

	if (g_bAnchors.get())
		RefineVert(msa, GuideTree, g_uMaxIters.get() - 2);
	else
		RefineHoriz(msa, GuideTree, g_uMaxIters.get() - 2, false, false);

#if	0
// Refining by subfamilies is disabled as it didn't give better
// results. I tried doing this before and after RefineHoriz.
// Should get back to this as it seems like this should work.
	RefineSubfams(msa, GuideTree, g_uMaxIters.get() - 2);
#endif

	ValidateMuscleIds(msa);
	ValidateMuscleIds(GuideTree);

	//TextFile fileOut(g_pstrOutFileName.get(), true);
	//MHackEnd(msa);
	//msa.ToFile(fileOut);
	MuscleOutput(msa);
	}
Example #19
0
void DiffTreesE(const Tree &NewTree, const Tree &OldTree,
  unsigned NewNodeIndexToOldNodeIndex[])
	{
#if	TRACE
	Log("DiffTreesE NewTree:\n");
	NewTree.LogMe();
	Log("\n");
	Log("OldTree:\n");
	OldTree.LogMe();
#endif

	if (!NewTree.IsRooted() || !OldTree.IsRooted())
		Quit("DiffTrees: requires rooted trees");

	const unsigned uNodeCount = NewTree.GetNodeCount();
	const unsigned uOldNodeCount = OldTree.GetNodeCount();
	const unsigned uLeafCount = NewTree.GetLeafCount();
	const unsigned uOldLeafCount = OldTree.GetLeafCount();
	if (uNodeCount != uOldNodeCount || uLeafCount != uOldLeafCount)
		Quit("DiffTreesE: different node counts");

	{
	unsigned *IdToOldNodeIndex = new unsigned[uNodeCount];
	for (unsigned uOldNodeIndex = 0; uOldNodeIndex < uNodeCount; ++uOldNodeIndex)
		{
		if (OldTree.IsLeaf(uOldNodeIndex))
			{
			unsigned Id = OldTree.GetLeafId(uOldNodeIndex);
			IdToOldNodeIndex[Id] = uOldNodeIndex;
			}
		}

// Initialize NewNodeIndexToOldNodeIndex[]
// All internal nodes are marked as changed, but may be updated later.
	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
		{
		if (NewTree.IsLeaf(uNewNodeIndex))
			{
			unsigned uId = NewTree.GetLeafId(uNewNodeIndex);
			assert(uId < uLeafCount);

			unsigned uOldNodeIndex = IdToOldNodeIndex[uId];
			assert(uOldNodeIndex < uNodeCount);

			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldNodeIndex;
			}
		else
			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
		}
	delete[] IdToOldNodeIndex;
	}

// Depth-first traversal of tree.
// The order guarantees that a node is visited before
// its parent is visited.
	for (unsigned uNewNodeIndex = NewTree.FirstDepthFirstNode();
	  NULL_NEIGHBOR != uNewNodeIndex;
	  uNewNodeIndex = NewTree.NextDepthFirstNode(uNewNodeIndex))
		{
		if (NewTree.IsLeaf(uNewNodeIndex))
			continue;

	// If either child is changed, flag this node as changed and continue.
		unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
		unsigned uOldLeft = NewNodeIndexToOldNodeIndex[uNewLeft];
		if (NODE_CHANGED == uOldLeft)
			{
			NewNodeIndexToOldNodeIndex[uNewLeft] = NODE_CHANGED;
			continue;
			}

		unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);
		unsigned uOldRight = NewNodeIndexToOldNodeIndex[uNewRight];
		if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewRight])
			{
			NewNodeIndexToOldNodeIndex[uNewRight] = NODE_CHANGED;
			continue;
			}

		unsigned uOldParentLeft = OldTree.GetParent(uOldLeft);
		unsigned uOldParentRight = OldTree.GetParent(uOldRight);
		if (uOldParentLeft == uOldParentRight)
			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = uOldParentLeft;
		else
			NewNodeIndexToOldNodeIndex[uNewNodeIndex] = NODE_CHANGED;
		}

#if TRACE
	{
	Log("NewToOld ");
	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
		{
		Log(" [%3u]=", uNewNodeIndex);
		if (NODE_CHANGED == NewNodeIndexToOldNodeIndex[uNewNodeIndex])
			Log("  X");
		else
			Log("%3u", NewNodeIndexToOldNodeIndex[uNewNodeIndex]);
		if ((uNewNodeIndex+1)%8 == 0)
			Log("\n         ");
		}
	Log("\n");
	}
#endif

#if	DEBUG
	{
	for (unsigned uNewNodeIndex = 0; uNewNodeIndex < uNodeCount; ++uNewNodeIndex)
		{
		unsigned uOld = NewNodeIndexToOldNodeIndex[uNewNodeIndex];
		if (NewTree.IsLeaf(uNewNodeIndex))
			{
			if (uOld >= uNodeCount)
				{
				Log("NewNode=%u uOld=%u > uNodeCount=%u\n",
				  uNewNodeIndex, uOld, uNodeCount);
				Quit("Diff check failed");
				}
			unsigned uIdNew = NewTree.GetLeafId(uNewNodeIndex);
			unsigned uIdOld = OldTree.GetLeafId(uOld);
			if (uIdNew != uIdOld)
				{
				Log("NewNode=%u uOld=%u IdNew=%u IdOld=%u\n",
				  uNewNodeIndex, uOld, uIdNew, uIdOld);
				Quit("Diff check failed");
				}
			continue;
			}

		if (NODE_CHANGED == uOld)
			continue;

		unsigned uNewLeft = NewTree.GetLeft(uNewNodeIndex);
		unsigned uNewRight = NewTree.GetRight(uNewNodeIndex);

		unsigned uOldLeft = OldTree.GetLeft(uOld);
		unsigned uOldRight = OldTree.GetRight(uOld);

		unsigned uNewLeftPartner = NewNodeIndexToOldNodeIndex[uNewLeft];
		unsigned uNewRightPartner = NewNodeIndexToOldNodeIndex[uNewRight];

		bool bSameNotRotated = (uNewLeftPartner == uOldLeft && uNewRightPartner == uOldRight);
		bool bSameRotated = (uNewLeftPartner == uOldRight && uNewRightPartner == uOldLeft);
		if (!bSameNotRotated && !bSameRotated)
			{
			Log("NewNode=%u NewL=%u NewR=%u\n", uNewNodeIndex, uNewLeft, uNewRight);
			Log("OldNode=%u OldL=%u OldR=%u\n", uOld, uOldLeft, uOldRight);
			Log("NewLPartner=%u NewRPartner=%u\n", uNewLeftPartner, uNewRightPartner);
			Quit("Diff check failed");
			}
		}
	}
#endif
	}
Example #20
0
void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[])
	{
#if	TRACE
	Log("CalcClustalWWeights\n");
	tree.LogMe();
#endif

	const unsigned uLeafCount = tree.GetLeafCount();
	if (0 == uLeafCount)
		return;
	else if (1 == uLeafCount)
		{
		Weights[0] = (WEIGHT) 1.0;
		return;
		}
	else if (2 == uLeafCount)
		{
		Weights[0] = (WEIGHT) 0.5;
		Weights[1] = (WEIGHT) 0.5;
		return;
		}

	if (!tree.IsRooted())
		Quit("CalcClustalWWeights requires rooted tree");

	const unsigned uNodeCount = tree.GetNodeCount();
	unsigned *LeavesUnderNode = new unsigned[uNodeCount];
	memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned));

	const unsigned uRootNodeIndex = tree.GetRootNodeIndex();
	unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode);
	if (uLeavesUnderRoot != uLeafCount)
		Quit("WeightsFromTreee: Internal error, root count %u %u",
		  uLeavesUnderRoot, uLeafCount);

#if	TRACE
	Log("Node  Leaves    Length  Strength\n");
	Log("----  ------  --------  --------\n");
	//    1234  123456  12345678  12345678
#endif

	double *Strengths = new double[uNodeCount];
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (tree.IsRoot(uNodeIndex))
			{
			Strengths[uNodeIndex] = 0.0;
			continue;
			}
		const unsigned uParent = tree.GetParent(uNodeIndex);
		const double dLength = tree.GetEdgeLength(uNodeIndex, uParent);
		const unsigned uLeaves = LeavesUnderNode[uNodeIndex];
		const double dStrength = dLength / (double) uLeaves;
		Strengths[uNodeIndex] = dStrength;
#if	TRACE
		Log("%4u  %6u  %8g  %8g\n", uNodeIndex, uLeaves, dLength, dStrength);
#endif
		}

#if	TRACE
	Log("\n");
	Log("                 Seq  Path..Weight\n");
	Log("--------------------  ------------\n");
#endif
	for (unsigned n = 0; n < uLeafCount; ++n)
		{
		const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
#if	TRACE
		Log("%20.20s  %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex);
#endif
		if (!tree.IsLeaf(uLeafNodeIndex))
			Quit("CalcClustalWWeights: leaf");

		double dWeight = 0;
		unsigned uNode = uLeafNodeIndex;
		while (!tree.IsRoot(uNode))
			{
			dWeight += Strengths[uNode];
			uNode = tree.GetParent(uNode);
#if	TRACE
			Log("->%u(%g)", uNode, Strengths[uNode]);
#endif
			}
		if (dWeight < 0.0001)
			{
#if	TRACE
			Log("zero->one");
#endif
			dWeight = 1.0;
			}
		Weights[n] = (WEIGHT) dWeight;
#if	TRACE
		Log(" = %g\n", dWeight);
#endif
		}

	delete[] Strengths;
	delete[] LeavesUnderNode;

	Normalize(Weights, uLeafCount);
	}
Example #21
0
void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,
  double *ptrdLength1, double *ptrdLength2,
  ROOT RootMethod)
	{
#if	TRACE
	tree.LogMe();
#endif
	if (tree.IsRooted())
		Quit("FindRoot: tree already rooted");

	const unsigned uNodeCount = tree.GetNodeCount();
	const unsigned uLeafCount = tree.GetLeafCount();

	if (uNodeCount < 2)
		Quit("Root: don't support trees with < 2 edges");

	EdgeInfo **EIs = new EdgeInfo *[uNodeCount];
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		EIs[uNodeIndex] = new EdgeInfo[3];

	EdgeList Edges;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		if (tree.IsLeaf(uNodeIndex))
			{
			unsigned uParent = tree.GetNeighbor1(uNodeIndex);
			Edges.Add(uParent, uNodeIndex);
			}

#if	TRACE
	Log("Edges: ");
	Edges.LogMe();
#endif

// Main loop: iterate until all distances known
	double dAllMaxDist = -1e20;
	unsigned uMaxFrom = NULL_NEIGHBOR;
	unsigned uMaxTo = NULL_NEIGHBOR;
	for (;;)
		{
		EdgeList NextEdges;

#if	TRACE
		Log("\nTop of main loop\n");
		Log("Edges: ");
		Edges.LogMe();
		Log("MDs:\n");
		ListEIs(EIs, uNodeCount);
#endif

	// For all edges
		const unsigned uEdgeCount = Edges.GetCount();
		if (0 == uEdgeCount)
			break;
		for (unsigned n = 0; n < uEdgeCount; ++n)
			{
			unsigned uNodeFrom;
			unsigned uNodeTo;
			Edges.GetEdge(n, &uNodeFrom, &uNodeTo);

			CalcInfo(tree, uNodeFrom, uNodeTo, EIs);
#if	TRACE
			Log("Edge %u -> %u\n", uNodeFrom, uNodeTo);
#endif
			const unsigned uNeighborCount = tree.GetNeighborCount(uNodeFrom);
			for (unsigned i = 0; i < uNeighborCount; ++i)
				{
				const unsigned uNeighborIndex = tree.GetNeighbor(uNodeFrom, i);
				if (!Known(tree, EIs, uNeighborIndex, uNodeFrom) &&
				  AllKnownOut(tree, EIs, uNeighborIndex, uNodeFrom))
					NextEdges.Add(uNeighborIndex, uNodeFrom);
				}
			}
		Edges.Copy(NextEdges);
		}

#if	TRACE
	ListEIs(EIs, uNodeCount);
#endif

	switch (RootMethod)
		{
	case ROOT_MidLongestSpan:
		RootByMidLongestSpan(tree, EIs, ptruNode1, ptruNode2,
		  ptrdLength1, ptrdLength2);
		break;

	case ROOT_MinAvgLeafDist:
		RootByMinAvgLeafDist(tree, EIs, ptruNode1, ptruNode2,
		  ptrdLength1, ptrdLength2);
		break;

	default:
		Quit("Invalid RootMethod=%d", RootMethod);
		}

	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		delete[] EIs[uNodeIndex];
	delete[] EIs;
	}