Ejemplo n.º 1
0
static void LogGap(unsigned uStart, unsigned uEnd, unsigned uGapLength,
  bool bNTerm, bool bCTerm)
	{
	Log("%16.16s  ", "");
	for (unsigned i = 0; i < uStart; ++i)
		Log(" ");
	unsigned uMyLength = 0;
	for (unsigned i = uStart; i <= uEnd; ++i)
		{
		bool bGap1 = g_ptrMSA1.get()->IsGap(g_uSeqIndex1.get(), i);
		bool bGap2 = g_ptrMSA2.get()->IsGap(g_uSeqIndex2.get(), i);
		if (!bGap1 && !bGap2)
			Quit("Error -- neither gapping");
		if (bGap1 && bGap2)
			Log(".");
		else
			{
			++uMyLength;
			Log("-");
			}
		}
	SCORE s = GapPenalty(uGapLength, bNTerm || bCTerm);
	Log(" L=%d N%d C%d s=%.3g", uGapLength, bNTerm, bCTerm, s);
	Log("\n");
	if (uMyLength != uGapLength)
		Quit("Lengths differ");

	}
Ejemplo n.º 2
0
static const char *LocalScoreToStr(SCORE s)
	{
	static TLS<char[16]> str;
	if (s < -100000)
		return "     *";
	sprintf(str.get(), "%6.1f", s);
	return str.get();
	}
Ejemplo n.º 3
0
static inline unsigned TriangleSubscript(unsigned uIndex1, unsigned uIndex2)
	{
#if	DEBUG
	if (uIndex1 >= g_uLeafCount.get() || uIndex2 >= g_uLeafCount.get())
		Quit("TriangleSubscript(%u,%u) %u", uIndex1, uIndex2, g_uLeafCount.get());
#endif
	unsigned v;
	if (uIndex1 >= uIndex2)
		v = uIndex2 + (uIndex1*(uIndex1 - 1))/2;
	else
		v = uIndex1 + (uIndex2*(uIndex2 - 1))/2;
	assert(v < (g_uLeafCount.get()*(g_uLeafCount.get() - 1))/2);
	return v;
	}
Ejemplo n.º 4
0
void SaveCurrentAlignment()
	{
    extern TLS<MSA *>ptrBestMSA;
	static TLS<bool> bCalled(false);
	if (bCalled.get())
		{
		fprintf(stderr,
		  "\nRecursive call to SaveCurrentAlignment, giving up attempt to save.\n");
		exit(EXIT_FatalError);
		}

	if (0 == ptrBestMSA.get())
		{
		fprintf(stderr, "\nAlignment not completed, cannot save.\n");
		Log("Alignment not completed, cannot save.\n");
		exit(EXIT_FatalError);
		}

	if (0 == pstrOutputFileName.get())
		{
		fprintf(stderr, "\nOutput file name not specified, cannot save.\n");
		exit(EXIT_FatalError);
		}

	fprintf(stderr, "\nSaving current alignment ...\n");

	TextFile fileOut(pstrOutputFileName.get(), true);
	ptrBestMSA.get()->ToFASTAFile(fileOut);

	fprintf(stderr, "Current alignment saved to \"%s\".\n", pstrOutputFileName.get());
	Log("Current alignment saved to \"%s\".\n", pstrOutputFileName.get());
	}
Ejemplo n.º 5
0
static bool _indication_proc(Instance* cimple_inst, void* client_data)
{
    TRACE;

    // This function is called by the CIMPLE Indication_Handler<> in order to
    // deliver a single indication.

    Adapter* adapter = (Adapter*)client_data;

    // If this is the final call, just return.

    if (cimple_inst == 0)
        return false;

    // Convert CIMPLE instance to CMPI instance:

    CMPIInstance* cmpi_inst = 0;

    CMPIrc rc = make_cmpi_instance(adapter->broker,
        cimple_inst, _INDICATIONS_NAMESPACE, 0, cmpi_inst);

    // Deliver the indication (we cannot do anything about failures).

    if (rc == CMPI_RC_OK)
    {
        // Grab the CMPI context from thread-specific-data.

        const CMPIContext* context = (const CMPIContext*)_context_tls.get();

        // Deliver the indication:

        CBDeliverIndication(
            adapter->broker, context, _INDICATIONS_NAMESPACE, cmpi_inst);
    }

    // Keep them coming!
    return true;
}
Ejemplo n.º 6
0
SCORE NWDASimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB, PWPath &Path)
	{
	assert(uLengthB > 0 && uLengthA > 0);

	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

// Allocate DP matrices
	const size_t LM = uPrefixCountA*uPrefixCountB;
	SCORE *DPL_ = new SCORE[LM];
	SCORE *DPM_ = new SCORE[LM];
	SCORE *DPD_ = new SCORE[LM];
	SCORE *DPE_ = new SCORE[LM];
	SCORE *DPI_ = new SCORE[LM];
	SCORE *DPJ_ = new SCORE[LM];

	char *TBM_ = new char[LM];
	char *TBD_ = new char[LM];
	char *TBE_ = new char[LM];
	char *TBI_ = new char[LM];
	char *TBJ_ = new char[LM];

	memset(TBM_, '?', LM);
	memset(TBD_, '?', LM);
	memset(TBE_, '?', LM);
	memset(TBI_, '?', LM);
	memset(TBJ_, '?', LM);

	DPM(0, 0) = 0;
	DPD(0, 0) = MINUS_INFINITY;
	DPE(0, 0) = MINUS_INFINITY;
	DPI(0, 0) = MINUS_INFINITY;
	DPJ(0, 0) = MINUS_INFINITY;

	DPM(1, 0) = MINUS_INFINITY;
	DPD(1, 0) = PA[0].m_scoreGapOpen;
	DPE(1, 0) = PA[0].m_scoreGapOpen2;
	TBD(1, 0) = 'D';
	TBE(1, 0) = 'E';
	DPI(1, 0) = MINUS_INFINITY;
	DPJ(1, 0) = MINUS_INFINITY;

	DPM(0, 1) = MINUS_INFINITY;
	DPD(0, 1) = MINUS_INFINITY;
	DPE(0, 1) = MINUS_INFINITY;
	DPI(0, 1) = PB[0].m_scoreGapOpen;
	DPJ(0, 1) = PB[0].m_scoreGapOpen2;
	TBI(0, 1) = 'I';
	TBJ(0, 1) = 'J';

// Empty prefix of B is special case
	for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
		{
		DPM(uPrefixLengthA, 0) = MINUS_INFINITY;

		DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) + g_scoreGapExtend.get();
		DPE(uPrefixLengthA, 0) = DPE(uPrefixLengthA - 1, 0) + g_scoreGapExtend2.get();

		TBD(uPrefixLengthA, 0) = 'D';
		TBE(uPrefixLengthA, 0) = 'E';

		DPI(uPrefixLengthA, 0) = MINUS_INFINITY;
		DPJ(uPrefixLengthA, 0) = MINUS_INFINITY;
		}

// Empty prefix of A is special case
	for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		{
		DPM(0, uPrefixLengthB) = MINUS_INFINITY;

		DPD(0, uPrefixLengthB) = MINUS_INFINITY;
		DPE(0, uPrefixLengthB) = MINUS_INFINITY;

		DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) + g_scoreGapExtend.get();
		DPJ(0, uPrefixLengthB) = DPJ(0, uPrefixLengthB - 1) + g_scoreGapExtend2.get();

		TBI(0, uPrefixLengthB) = 'I';
		TBJ(0, uPrefixLengthB) = 'J';
		}

// Special case to agree with NWFast, no D-I transitions so...
	DPD(uLengthA, 0) = MINUS_INFINITY;
	DPE(uLengthA, 0) = MINUS_INFINITY;
//	DPI(0, uLengthB) = MINUS_INFINITY;
//	DPJ(0, uLengthB) = MINUS_INFINITY;

// ============
// Main DP loop
// ============
	SCORE scoreGapCloseB = MINUS_INFINITY;
	SCORE scoreGapClose2B = MINUS_INFINITY;
	for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)
		{
		const ProfPos &PPB = PB[uPrefixLengthB - 1];

		SCORE scoreGapCloseA = MINUS_INFINITY;
		SCORE scoreGapClose2A = MINUS_INFINITY;
		for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)
			{
			const ProfPos &PPA = PA[uPrefixLengthA - 1];

			{
		// Match M=LetterA+LetterB
			SCORE scoreLL = ScoreProfPos2(PPA, PPB);
			DPL(uPrefixLengthA, uPrefixLengthB) = scoreLL;

			SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1);
			SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA;
			SCORE scoreEM = DPE(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2A;
			SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB;
			SCORE scoreJM = DPJ(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2B;

			SCORE scoreBest;
			if (scoreMM >= scoreDM && scoreMM >= scoreEM && scoreMM >= scoreIM && scoreMM >= scoreJM)
				{
				scoreBest = scoreMM;
				TBM(uPrefixLengthA, uPrefixLengthB) = 'M';
				}
			else if (scoreDM >= scoreMM && scoreDM >= scoreEM && scoreDM >= scoreIM && scoreDM >= scoreJM)
				{
				scoreBest = scoreDM;
				TBM(uPrefixLengthA, uPrefixLengthB) = 'D';
				}
			else if (scoreEM >= scoreMM && scoreEM >= scoreDM && scoreEM >= scoreIM && scoreEM >= scoreJM)
				{
				scoreBest = scoreEM;
				TBM(uPrefixLengthA, uPrefixLengthB) = 'E';
				}
			else if (scoreIM >= scoreMM && scoreIM >= scoreDM && scoreIM >= scoreEM && scoreIM >= scoreJM)
				{
				scoreBest = scoreIM;
				TBM(uPrefixLengthA, uPrefixLengthB) = 'I';
				}
			else
				{
				assert(scoreJM >= scoreMM && scoreJM >= scoreDM && scoreJM >= scoreEM && scoreJM >= scoreIM);
				scoreBest = scoreJM;
				TBM(uPrefixLengthA, uPrefixLengthB) = 'J';
				}
			DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;
			}

			{
		// Delete D=LetterA+GapB
			SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) +
			  PA[uPrefixLengthA-1].m_scoreGapOpen;
			SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend.get();

			SCORE scoreBest;
			if (scoreMD >= scoreDD)
				{
				scoreBest = scoreMD;
				TBD(uPrefixLengthA, uPrefixLengthB) = 'M';
				}
			else
				{
				assert(scoreDD >= scoreMD);
				scoreBest = scoreDD;
				TBD(uPrefixLengthA, uPrefixLengthB) = 'D';
				}
			DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			}

			{
		// Delete E=LetterA+GapB
			SCORE scoreME = DPM(uPrefixLengthA-1, uPrefixLengthB) +
			  PA[uPrefixLengthA-1].m_scoreGapOpen2;
			SCORE scoreEE = DPE(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend2.get();

			SCORE scoreBest;
			if (scoreME >= scoreEE)
				{
				scoreBest = scoreME;
				TBE(uPrefixLengthA, uPrefixLengthB) = 'M';
				}
			else
				{
				assert(scoreEE >= scoreME);
				scoreBest = scoreEE;
				TBE(uPrefixLengthA, uPrefixLengthB) = 'E';
				}
			DPE(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			}

		// Insert I=GapA+LetterB
			{
			SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) +
			  PB[uPrefixLengthB - 1].m_scoreGapOpen;
			SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend.get();

			SCORE scoreBest;
			if (scoreMI >= scoreII)
				{
				scoreBest = scoreMI;
				TBI(uPrefixLengthA, uPrefixLengthB) = 'M';
				}
			else 
				{
				assert(scoreII > scoreMI);
				scoreBest = scoreII;
				TBI(uPrefixLengthA, uPrefixLengthB) = 'I';
				}
			DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			}

		// Insert J=GapA+LetterB
			{
			SCORE scoreMJ = DPM(uPrefixLengthA, uPrefixLengthB-1) +
			  PB[uPrefixLengthB - 1].m_scoreGapOpen2;
			SCORE scoreJJ = DPJ(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend2.get();

			SCORE scoreBest;
			if (scoreMJ >= scoreJJ)
				{
				scoreBest = scoreMJ;
				TBJ(uPrefixLengthA, uPrefixLengthB) = 'M';
				}
			else 
				{
				assert(scoreJJ > scoreMJ);
				scoreBest = scoreJJ;
				TBJ(uPrefixLengthA, uPrefixLengthB) = 'J';
				}
			DPJ(uPrefixLengthA, uPrefixLengthB) = scoreBest;
			}

			scoreGapCloseA = PPA.m_scoreGapClose;
			scoreGapClose2A = PPA.m_scoreGapClose2;
			}
		scoreGapCloseB = PPB.m_scoreGapClose;
		scoreGapClose2B = PPB.m_scoreGapClose2;
		}

#if TRACE
	Log("\n");
	Log("DA Simple DPL:\n");
	ListDP(DPL_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple DPM:\n");
	ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple DPD:\n");
	ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple DPE:\n");
	ListDP(DPE_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple DPI:\n");
	ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple DPJ:\n");
	ListDP(DPJ_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple TBM:\n");
	ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple TBD:\n");
	ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple TBE:\n");
	ListTB(TBE_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple TBI:\n");
	ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);
	Log("\n");
	Log("DA Simple TBJ:\n");
	ListTB(TBJ_, PA, PB, uPrefixCountA, uPrefixCountB);
#endif

// Trace-back
// ==========
	Path.Clear();

// Find last edge
	SCORE M = DPM(uLengthA, uLengthB);
	SCORE D = DPD(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose;
	SCORE E = DPE(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose2;
	SCORE I = DPI(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose;
	SCORE J = DPJ(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose2;
	char cEdgeType = '?';

	SCORE BestScore = M;
	cEdgeType = 'M';
	if (D > BestScore)
		{
		cEdgeType = 'D';
		BestScore = D;
		}
	if (E > BestScore)
		{
		cEdgeType = 'E';
		BestScore = E;
		}
	if (I > BestScore)
		{
		cEdgeType = 'I';
		BestScore = I;
		}
	if (J > BestScore)
		{
		cEdgeType = 'J';
		BestScore = J;
		}

#if	TRACE
	Log("DA Simple: MAB=%.4g DAB=%.4g EAB=%.4g IAB=%.4g JAB=%.4g best=%c\n",
	  M, D, E, I, J, cEdgeType);
#endif

	unsigned PLA = uLengthA;
	unsigned PLB = uLengthB;
	for (;;)
		{
		PWEdge Edge;
		Edge.cType = XlatEdgeType(cEdgeType);
		Edge.uPrefixLengthA = PLA;
		Edge.uPrefixLengthB = PLB;
#if	TRACE
		Log("Prepend %c%d.%d\n", Edge.cType, PLA, PLB);
#endif
		Path.PrependEdge(Edge);

		switch (cEdgeType)
			{
		case 'M':
			assert(PLA > 0);
			assert(PLB > 0);
			cEdgeType = TBM(PLA, PLB);
			--PLA;
			--PLB;
			break;

		case 'D':
			assert(PLA > 0);
			cEdgeType = TBD(PLA, PLB);
			--PLA;
			break;

		case 'E':
			assert(PLA > 0);
			cEdgeType = TBE(PLA, PLB);
			--PLA;
			break;

		case 'I':
			assert(PLB > 0);
			cEdgeType = TBI(PLA, PLB);
			--PLB;
			break;
		
		case 'J':
			assert(PLB > 0);
			cEdgeType = TBJ(PLA, PLB);
			--PLB;
			break;

		default:
			Quit("Invalid edge %c", cEdgeType);
			}
		if (0 == PLA && 0 == PLB)
			break;
		}
	Path.Validate();

//	SCORE Score = TraceBack(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_, Path);

#if	TRACE
	SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path);
	Path.LogMe();
	Log("Score = %s Path = %s\n", LocalScoreToStr(BestScore), LocalScoreToStr(scorePath));
#endif

	if (g_bKeepSimpleDP.get())
		{
		g_DPM.get() = DPM_;
		g_DPD.get() = DPD_;
		g_DPE.get() = DPE_;
		g_DPI.get() = DPI_;
		g_DPJ.get() = DPJ_;

		g_TBM.get() = TBM_;
		g_TBD.get() = TBD_;
		g_TBE.get() = TBE_;
		g_TBI.get() = TBI_;
		g_TBJ.get() = TBJ_;
		}
	else
		{
		delete[] DPM_;
		delete[] DPD_;
		delete[] DPE_;
		delete[] DPI_;
		delete[] DPJ_;

		delete[] TBM_;
		delete[] TBD_;
		delete[] TBE_;
		delete[] TBI_;
		delete[] TBJ_;
		}

	return BestScore;
	}
Ejemplo n.º 7
0
static void ListState()
	{
	Log("Dist matrix\n");
	Log("     ");
	for (unsigned i = 0; i < g_uLeafCount.get(); ++i)
		{
		if (uInsane == g_uNodeIndex.get()[i])
			continue;
		Log("  %5u", g_uNodeIndex.get()[i]);
		}
	Log("\n");

	for (unsigned i = 0; i < g_uLeafCount.get(); ++i)
		{
		if (uInsane == g_uNodeIndex.get()[i])
			continue;
		Log("%5u  ", g_uNodeIndex.get()[i]);
		for (unsigned j = 0; j < g_uLeafCount.get(); ++j)
			{
			if (uInsane == g_uNodeIndex.get()[j])
				continue;
			if (i == j)
				Log("       ");
			else
				{
				unsigned v = TriangleSubscript(i, j);
				Log("%5.2g  ", g_Dist.get()[v]);
				}
			}
		Log("\n");
		}

	Log("\n");
	Log("    i   Node   NrNb      Dist\n");
	Log("-----  -----  -----  --------\n");
	for (unsigned i = 0; i < g_uLeafCount.get(); ++i)
		{
		if (uInsane == g_uNodeIndex.get()[i])
			continue;
		Log("%5u  %5u  %5u  %8.3f\n",
		  i,
		  g_uNodeIndex.get()[i],
		  g_uNearestNeighbor.get()[i],
		  g_MinDist.get()[i]);
		}

	Log("\n");
	Log(" Node      L      R  Height  LLength  RLength\n");
	Log("-----  -----  -----  ------  -------  -------\n");
	for (unsigned i = 0; i <= g_uInternalNodeIndex.get(); ++i)
		Log("%5u  %5u  %5u  %6.2g  %6.2g  %6.2g\n",
		  i,
		  g_uLeft.get()[i],
		  g_uRight.get()[i],
		  g_Height.get()[i],
		  g_LeftLength.get()[i],
		  g_RightLength.get()[i]);
	}
Ejemplo n.º 8
0
void UPGMA2(const DistCalc &DC, Tree &tree, LINKAGE Linkage)
	{
	g_uLeafCount.get() = DC.GetCount();

	g_uTriangleSize.get() = (g_uLeafCount.get()*(g_uLeafCount.get() - 1))/2;
	g_uInternalNodeCount.get() = g_uLeafCount.get() - 1;

	g_Dist.get() = new dist_t[g_uTriangleSize.get()];

	g_uNodeIndex.get() = new unsigned[g_uLeafCount.get()];
	g_uNearestNeighbor.get() = new unsigned[g_uLeafCount.get()];
	g_MinDist.get() = new dist_t[g_uLeafCount.get()];
	unsigned *Ids = new unsigned [g_uLeafCount.get()];
	char **Names = new char *[g_uLeafCount.get()];

	g_uLeft.get() = new unsigned[g_uInternalNodeCount.get()];
	g_uRight.get() = new unsigned[g_uInternalNodeCount.get()];
	g_Height.get() = new dist_t[g_uInternalNodeCount.get()];
	g_LeftLength.get() = new dist_t[g_uInternalNodeCount.get()];
	g_RightLength.get() = new dist_t[g_uInternalNodeCount.get()];

	for (unsigned i = 0; i < g_uLeafCount.get(); ++i)
		{
		g_MinDist.get()[i] = BIG_DIST;
		g_uNodeIndex.get()[i] = i;
		g_uNearestNeighbor.get()[i] = uInsane;
		Ids[i] = DC.GetId(i);
		Names[i] = strsave(DC.GetName(i));
		}

	for (unsigned i = 0; i < g_uInternalNodeCount.get(); ++i)
		{
		g_uLeft.get()[i] = uInsane;
		g_uRight.get()[i] = uInsane;
		g_LeftLength.get()[i] = BIG_DIST;
		g_RightLength.get()[i] = BIG_DIST;
		g_Height.get()[i] = BIG_DIST;
		}

// Compute initial NxN triangular distance matrix.
// Store minimum distance for each full (not triangular) row.
// Loop from 1, not 0, because "row" is 0, 1 ... i-1,
// so nothing to do when i=0.
	for (unsigned i = 1; i < g_uLeafCount.get(); ++i)
		{
		dist_t *Row = g_Dist.get() + TriangleSubscript(i, 0);
		DC.CalcDistRange(i, Row);
		for (unsigned j = 0; j < i; ++j)
			{
			const dist_t d = Row[j];
			if (d < g_MinDist.get()[i])
				{
				g_MinDist.get()[i] = d;
				g_uNearestNeighbor.get()[i] = j;
				}
			if (d < g_MinDist.get()[j])
				{
				g_MinDist.get()[j] = d;
				g_uNearestNeighbor.get()[j] = i;
				}
			}
		}

#if	TRACE
	Log("Initial state:\n");
	ListState();
#endif

	for (g_uInternalNodeIndex.get() = 0; g_uInternalNodeIndex.get() < g_uLeafCount.get() - 1;
	  ++g_uInternalNodeIndex.get())
		{
#if	TRACE
		Log("\n");
		Log("Internal node index %5u\n", g_uInternalNodeIndex.get());
		Log("-------------------------\n");
#endif

	// Find nearest neighbors
		unsigned Lmin = uInsane;
		unsigned Rmin = uInsane;
		dist_t dtMinDist = BIG_DIST;
		for (unsigned j = 0; j < g_uLeafCount.get(); ++j)
			{
			if (uInsane == g_uNodeIndex.get()[j])
				continue;

			dist_t d = g_MinDist.get()[j];
			if (d < dtMinDist)
				{
				dtMinDist = d;
				Lmin = j;
				Rmin = g_uNearestNeighbor.get()[j];
				assert(uInsane != Rmin);
				assert(uInsane != g_uNodeIndex.get()[Rmin]);
				}
			}

		assert(Lmin != uInsane);
		assert(Rmin != uInsane);
		assert(dtMinDist != BIG_DIST);

#if	TRACE
		Log("Nearest neighbors Lmin %u[=%u] Rmin %u[=%u] dist %.3g\n",
		  Lmin,
		  g_uNodeIndex.get()[Lmin],
		  Rmin,
		  g_uNodeIndex.get()[Rmin],
		  dtMinDist);
#endif

	// Compute distances to new node
	// New node overwrites row currently assigned to Lmin
		dist_t dtNewMinDist = BIG_DIST;
		unsigned uNewNearestNeighbor = uInsane;
		for (unsigned j = 0; j < g_uLeafCount.get(); ++j)
			{
			if (j == Lmin || j == Rmin)
				continue;
			if (uInsane == g_uNodeIndex.get()[j])
				continue;

			const unsigned vL = TriangleSubscript(Lmin, j);
			const unsigned vR = TriangleSubscript(Rmin, j);
			const dist_t dL = g_Dist.get()[vL];
			const dist_t dR = g_Dist.get()[vR];
			dist_t dtNewDist;

			switch (Linkage)
				{
			case LINKAGE_Avg:
				dtNewDist = AVG(dL, dR);
				break;

			case LINKAGE_Min:
				dtNewDist = MIN(dL, dR);
				break;

			case LINKAGE_Max:
				dtNewDist = MAX(dL, dR);
				break;

			case LINKAGE_Biased:
				dtNewDist = g_dSUEFF.get()*AVG(dL, dR) + (1 - g_dSUEFF.get())*MIN(dL, dR);
				break;

			default:
				Quit("UPGMA2: Invalid LINKAGE_%u", Linkage);
				}

		// Nasty special case.
		// If nearest neighbor of j is Lmin or Rmin, then make the new
		// node (which overwrites the row currently occupied by Lmin)
		// the nearest neighbor. This situation can occur when there are
		// equal distances in the matrix. If we don't make this fix,
		// the nearest neighbor pointer for j would become invalid.
		// (We don't need to test for == Lmin, because in that case
		// the net change needed is zero due to the change in row
		// numbering).
			if (g_uNearestNeighbor.get()[j] == Rmin)
				g_uNearestNeighbor.get()[j] = Lmin;

#if	TRACE
			Log("New dist to %u = (%u/%.3g + %u/%.3g)/2 = %.3g\n",
			  j, Lmin, dL, Rmin, dR, dtNewDist);
#endif
			g_Dist.get()[vL] = dtNewDist;
			if (dtNewDist < dtNewMinDist)
				{
				dtNewMinDist = dtNewDist;
				uNewNearestNeighbor = j;
				}
			}

		assert(g_uInternalNodeIndex.get() < g_uLeafCount.get() - 1 || BIG_DIST != dtNewMinDist);
		assert(g_uInternalNodeIndex.get() < g_uLeafCount.get() - 1 || uInsane != uNewNearestNeighbor);

		const unsigned v = TriangleSubscript(Lmin, Rmin);
		const dist_t dLR = g_Dist.get()[v];
		const dist_t dHeightNew = dLR/2;
		const unsigned uLeft = g_uNodeIndex.get()[Lmin];
		const unsigned uRight = g_uNodeIndex.get()[Rmin];
		const dist_t HeightLeft =
		  uLeft < g_uLeafCount.get() ? 0 : g_Height.get()[uLeft - g_uLeafCount.get()];
		const dist_t HeightRight =
		  uRight < g_uLeafCount.get() ? 0 : g_Height.get()[uRight - g_uLeafCount.get()];

		g_uLeft.get()[g_uInternalNodeIndex.get()] = uLeft;
		g_uRight.get()[g_uInternalNodeIndex.get()] = uRight;
		g_LeftLength.get()[g_uInternalNodeIndex.get()] = dHeightNew - HeightLeft;
		g_RightLength.get()[g_uInternalNodeIndex.get()] = dHeightNew - HeightRight;
		g_Height.get()[g_uInternalNodeIndex.get()] = dHeightNew;

	// Row for left child overwritten by row for new node
		g_uNodeIndex.get()[Lmin] = g_uLeafCount.get() + g_uInternalNodeIndex.get();
		g_uNearestNeighbor.get()[Lmin] = uNewNearestNeighbor;
		g_MinDist.get()[Lmin] = dtNewMinDist;

	// Delete row for right child
		g_uNodeIndex.get()[Rmin] = uInsane;

#if	TRACE
		Log("\nInternalNodeIndex=%u Lmin=%u Rmin=%u\n",
		  g_uInternalNodeIndex.get(), Lmin, Rmin);
		ListState();
#endif
		}

	unsigned uRoot = g_uLeafCount.get() - 2;
	tree.Create(g_uLeafCount.get(), uRoot, g_uLeft.get(), g_uRight.get(), g_LeftLength.get(), g_RightLength.get(),
	  Ids, Names);

#if	TRACE
	tree.LogMe();
#endif

	delete[] g_Dist.get();

	delete[] g_uNodeIndex.get();
	delete[] g_uNearestNeighbor.get();
	delete[] g_MinDist.get();
	delete[] g_Height.get();

	delete[] g_uLeft.get();
	delete[] g_uRight.get();
	delete[] g_LeftLength.get();
	delete[] g_RightLength.get();
	
	for (unsigned i = 0; i < g_uLeafCount.get(); ++i)
		free(Names[i]);
	delete[] Names;
	delete[] Ids;
	}
Ejemplo n.º 9
0
// WARNING: Sequences MUST be stripped of gaps and upper case!
void DistKmer20_3(const SeqVect &v, DistFunc &DF)
	{
	const unsigned uSeqCount = v.Length();

	DF.SetCount(uSeqCount);
	if (0 == uSeqCount)
		return;
	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
		{
		DF.SetDist(uSeq1, uSeq1, 0);
		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
			DF.SetDist(uSeq1, uSeq2, 0);
		}

	const unsigned uTripleArrayBytes = TRIPLE_COUNT*sizeof(TripleCount);
	TripleCounts.get() = (TripleCount *) malloc(uTripleArrayBytes);
	if (0 == TripleCounts.get())
		Quit("Not enough memory (TripleCounts)");
	memset(TripleCounts.get(), 0, uTripleArrayBytes);

	for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
		{
		TripleCount &tc = *(TripleCounts.get() + uWord);
		const unsigned uBytes = uSeqCount*sizeof(short);
		tc.m_Counts = (unsigned short *) malloc(uBytes);
		memset(tc.m_Counts, 0, uBytes);
		}

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq &s = *(v[uSeqIndex]);
		const unsigned uSeqLength = s.Length();
		for (unsigned uPos = 0; uPos < uSeqLength - 2; ++uPos)
			{
			const unsigned uLetter1 = CharToLetterEx(s[uPos]);
			if (uLetter1 >= 20)
				continue;
			const unsigned uLetter2 = CharToLetterEx(s[uPos+1]);
			if (uLetter2 >= 20)
				continue;
			const unsigned uLetter3 = CharToLetterEx(s[uPos+2]);
			if (uLetter3 >= 20)
				continue;

			const unsigned uWord = uLetter1 + uLetter2*20 + uLetter3*20*20;
			assert(uWord < TRIPLE_COUNT);

			TripleCount &tc = *(TripleCounts.get() + uWord);
			const unsigned uOldCount = tc.m_Counts[uSeqIndex];
			if (0 == uOldCount)
				++(tc.m_uSeqCount);

			++(tc.m_Counts[uSeqIndex]);
			}
		}

#if TRACE
	{
	Log("TripleCounts\n");
	unsigned uGrandTotal = 0;
	for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
		{
		const TripleCount &tc = *(TripleCounts.get() + uWord);
		if (0 == tc.m_uSeqCount)
			continue;

		const unsigned uLetter3 = uWord/(20*20);
		const unsigned uLetter2 = (uWord - uLetter3*20*20)/20;
		const unsigned uLetter1 = uWord%20;
		Log("Word %6u %c%c%c   %6u",
		  uWord,
		  LetterToCharAmino(uLetter1),
		  LetterToCharAmino(uLetter2),
		  LetterToCharAmino(uLetter3),
		  tc.m_uSeqCount);

		unsigned uSeqCountWithThisWord = 0;
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			{
			const unsigned uCount = tc.m_Counts[uSeqIndex];
			if (uCount > 0)
				{
				++uSeqCountWithThisWord;
				Log(" %u=%u", uSeqIndex, uCount);
				uGrandTotal += uCount;
				}
			}
		if (uSeqCountWithThisWord != tc.m_uSeqCount)
			Log(" *** SQ ERROR *** %u %u", tc.m_uSeqCount, uSeqCountWithThisWord);
		Log("\n");
		}
	
	unsigned uTotalBySeqLength = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq &s = *(v[uSeqIndex]);
		const unsigned uSeqLength = s.Length();
		uTotalBySeqLength += uSeqLength - 2;
		}
	if (uGrandTotal != uTotalBySeqLength)
		Log("*** TOTALS DISAGREE *** %u %u\n", uGrandTotal, uTotalBySeqLength);
	}
#endif

	const unsigned uSeqListBytes = uSeqCount*sizeof(unsigned);
	unsigned short *SeqList = (unsigned short *) malloc(uSeqListBytes);

	for (unsigned uWord = 0; uWord < TRIPLE_COUNT; ++uWord)
		{
		const TripleCount &tc = *(TripleCounts.get() + uWord);
		if (0 == tc.m_uSeqCount)
			continue;

		unsigned uSeqCountFound = 0;
		memset(SeqList, 0, uSeqListBytes);

		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			{
			if (tc.m_Counts[uSeqIndex] > 0)
				{
				SeqList[uSeqCountFound] = uSeqIndex;
				++uSeqCountFound;
				if (uSeqCountFound == tc.m_uSeqCount)
					break;
				}
			}
		assert(uSeqCountFound == tc.m_uSeqCount);

		for (unsigned uSeq1 = 0; uSeq1 < uSeqCountFound; ++uSeq1)
			{
			const unsigned uSeqIndex1 = SeqList[uSeq1];
			const unsigned uCount1 = tc.m_Counts[uSeqIndex1];
			for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
				{
				const unsigned uSeqIndex2 = SeqList[uSeq2];
				const unsigned uCount2 = tc.m_Counts[uSeqIndex2];
				const unsigned uMinCount = uCount1 < uCount2 ? uCount1 : uCount2;
				const double d = DF.GetDist(uSeqIndex1, uSeqIndex2);
				DF.SetDist(uSeqIndex1, uSeqIndex2, (float) (d + uMinCount));
				}
			}
		}
	delete[] SeqList;
	free(TripleCounts.get());

	unsigned uDone = 0;
	const unsigned uTotal = (uSeqCount*(uSeqCount - 1))/2;
	for (unsigned uSeq1 = 0; uSeq1 < uSeqCount; ++uSeq1)
		{
		DF.SetDist(uSeq1, uSeq1, 0.0);

		const Seq &s1 = *(v[uSeq1]);
		const unsigned uLength1 = s1.Length();

		for (unsigned uSeq2 = 0; uSeq2 < uSeq1; ++uSeq2)
			{
			const Seq &s2 = *(v[uSeq2]);
			const unsigned uLength2 = s2.Length();
			unsigned uMinLength = uLength1 < uLength2 ? uLength1 : uLength2;
			if (uMinLength < 3)
				{
				DF.SetDist(uSeq1, uSeq2, 1.0);
				continue;
				}

			const double dTripleCount = DF.GetDist(uSeq1, uSeq2);
			if (dTripleCount == 0)
				{
				DF.SetDist(uSeq1, uSeq2, 1.0);
				continue;
				}
			double dNormalizedTripletScore = dTripleCount/(uMinLength - 2);
			//double dEstimatedPairwiseIdentity = exp(0.3912*log(dNormalizedTripletScore));
			//if (dEstimatedPairwiseIdentity > 1)
			//	dEstimatedPairwiseIdentity = 1;
//			DF.SetDist(uSeq1, uSeq2, (float) (1.0 - dEstimatedPairwiseIdentity));
			DF.SetDist(uSeq1, uSeq2, (float) dNormalizedTripletScore);

#if	TRACE
			{
			Log("%s - %s  Triplet count = %g  Lengths %u, %u Estimated pwid = %g\n",
			  s1.GetName(), s2.GetName(), dTripleCount, uLength1, uLength2,
			  dEstimatedPairwiseIdentity);
			}
#endif
			if (uDone%1000 == 0)
				Progress(uDone, uTotal);
			}
		}
	ProgressStepsDone();
	}
Ejemplo n.º 10
0
static SCORE ScoreSeqPair(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2, SCORE *ptrLetters, SCORE *ptrGaps)
	{
	g_ptrMSA1.get() = &msa1;
	g_ptrMSA2.get() = &msa2;
	g_uSeqIndex1.get() = uSeqIndex1;
	g_uSeqIndex2.get() = uSeqIndex2;

	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPair, different lengths");

#if	TRACE
	Log("ScoreSeqPair\n");
	Log("%16.16s  ", msa1.GetSeqName(uSeqIndex1));
	for (unsigned i = 0; i < uColCount; ++i)
		Log("%c", msa1.GetChar(uSeqIndex1, i));
	Log("\n");
	Log("%16.16s  ", msa2.GetSeqName(uSeqIndex2));
	for (unsigned i = 0; i < uColCount; ++i)
		Log("%c", msa1.GetChar(uSeqIndex2, i));
	Log("\n");
#endif

	SCORE scoreTotal = 0;

// Substitution scores
	unsigned uFirstLetter1 = uInsane;
	unsigned uFirstLetter2 = uInsane;
	unsigned uLastLetter1 = uInsane;
	unsigned uLastLetter2 = uInsane;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		bool bWildcard1 = msa1.IsWildcard(uSeqIndex1, uColIndex);
		bool bWildcard2 = msa2.IsWildcard(uSeqIndex2, uColIndex);

		if (!bGap1)
			{
			if (uInsane == uFirstLetter1)
				uFirstLetter1 = uColIndex;
			uLastLetter1 = uColIndex;
			}
		if (!bGap2)
			{
			if (uInsane == uFirstLetter2)
				uFirstLetter2 = uColIndex;
			uLastLetter2 = uColIndex;
			}

		if (bGap1 || bGap2 || bWildcard1 || bWildcard2)
			continue;

		unsigned uLetter1 = msa1.GetLetter(uSeqIndex1, uColIndex);
		unsigned uLetter2 = msa2.GetLetter(uSeqIndex2, uColIndex);

		SCORE scoreMatch = (*g_ptrScoreMatrix.get())[uLetter1][uLetter2];
		scoreTotal += scoreMatch;
#if	TRACE
		Log("%c <-> %c = %7.1f  %10.1f\n",
		  msa1.GetChar(uSeqIndex1, uColIndex),
		  msa2.GetChar(uSeqIndex2, uColIndex),
		  scoreMatch,
		  scoreTotal);
#endif
		}
	
	*ptrLetters = scoreTotal;

// Gap penalties
	unsigned uGapLength = uInsane;
	unsigned uGapStartCol = uInsane;
	bool bGapping1 = false;
	bool bGapping2 = false;

	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);

		if (bGap1 && bGap2)
			continue;

		if (bGapping1)
			{
			if (bGap1)
				++uGapLength;
			else
				{
				bGapping1 = false;
				bool bNTerm = (uFirstLetter2 == uGapStartCol);
				bool bCTerm = (uLastLetter2 + 1 == uColIndex);
				SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
				scoreTotal += scoreGap;
#if	TRACE
				LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
				Log("GAP         %7.1f  %10.1f\n",
				  scoreGap,
				  scoreTotal);
#endif
				}
			continue;
			}
		else
			{
			if (bGap1)
				{
				uGapStartCol = uColIndex;
				bGapping1 = true;
				uGapLength = 1;
				continue;
				}
			}

		if (bGapping2)
			{
			if (bGap2)
				++uGapLength;
			else
				{
				bGapping2 = false;
				bool bNTerm = (uFirstLetter1 == uGapStartCol);
				bool bCTerm = (uLastLetter1 + 1 == uColIndex);
				SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
				scoreTotal += scoreGap;
#if	TRACE
				LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
				Log("GAP         %7.1f  %10.1f\n",
				  scoreGap,
				  scoreTotal);
#endif
				}
			}
		else
			{
			if (bGap2)
				{
				uGapStartCol = uColIndex;
				bGapping2 = true;
				uGapLength = 1;
				}
			}
		}

	if (bGapping1 || bGapping2)
		{
		SCORE scoreGap = GapPenalty(uGapLength, true);
		scoreTotal += scoreGap;
#if	TRACE
		LogGap(uGapStartCol, uColCount - 1, uGapLength, false, true);
		Log("GAP         %7.1f  %10.1f\n",
		  scoreGap,
		  scoreTotal);
#endif
		}
	*ptrGaps = scoreTotal - *ptrLetters;
	return scoreTotal;
	}
Ejemplo n.º 11
0
void SetCurrentAlignment(MSA &msa)
	{
    extern TLS<MSA *>ptrBestMSA;
	ptrBestMSA.get() = &msa;
	}
Ejemplo n.º 12
0
void SetOutputFileName(const char *out)
	{
	pstrOutputFileName.get() = out;
	}