コード例 #1
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void MSAFromColRange(const MSA &msaIn, unsigned uFromColIndex, unsigned uColCount,
  MSA &msaOut)
	{
	const unsigned uSeqCount = msaIn.GetSeqCount();
	const unsigned uInColCount = msaIn.GetColCount();

	if (uFromColIndex + uColCount - 1 > uInColCount)
		Quit("MSAFromColRange, out of bounds");

	msaOut.SetSize(uSeqCount, uColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const char *ptrName = msaIn.GetSeqName(uSeqIndex);
		unsigned uId = msaIn.GetSeqId(uSeqIndex);
		msaOut.SetSeqName(uSeqIndex, ptrName);
		msaOut.SetSeqId(uSeqIndex, uId);

		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msaIn.GetChar(uSeqIndex, uFromColIndex + uColIndex);
			msaOut.SetChar(uSeqIndex, uColIndex, c);
			}
		}
	}
コード例 #2
0
ファイル: seqvect.cpp プロジェクト: cran/muscle
void SeqVect::PadToMSA(MSA &msa)
	{
	unsigned uSeqCount = Length();
	if (0 == uSeqCount)
		{
		msa.Clear();
		return;
		}

	unsigned uLongestSeqLength = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq *ptrSeq = at(uSeqIndex);
		unsigned uColCount = ptrSeq->Length();
		if (uColCount > uLongestSeqLength)
			uLongestSeqLength = uColCount;
		}
	msa.SetSize(uSeqCount, uLongestSeqLength);
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq *ptrSeq = at(uSeqIndex);
		msa.SetSeqName(uSeqIndex, ptrSeq->GetName());
		unsigned uColCount = ptrSeq->Length();
		unsigned uColIndex;
		for (uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			char c = ptrSeq->at(uColIndex);
			msa.SetChar(uSeqIndex, uColIndex, c);
			}
		while (uColIndex < uLongestSeqLength)
			msa.SetChar(uSeqIndex, uColIndex++, '.');
		}
	}
コード例 #3
0
ファイル: estring.cpp プロジェクト: Unode/ext_apps
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a)
	{
	unsigned uSymbols;
	unsigned uIndels;
	EstringCounts(es, &uSymbols, &uIndels);
	assert(sIn.Length() == uSymbols);

	unsigned uColCount = uSymbols + uIndels;

	a.Clear();
	a.SetSize(1, uColCount);

	a.SetSeqName(0, sIn.GetName());
	a.SetSeqId(0, sIn.GetId());

	unsigned p = 0;
	unsigned uColIndex = 0;
	for (;;)
		{
		int n = *es++;
		if (0 == n)
			break;
		if (n > 0)
			for (int i = 0; i < n; ++i)
				{
				char c = sIn[p++];
				a.SetChar(0, uColIndex++, c);
				}
		else
			for (int i = 0; i < -n; ++i)
				a.SetChar(0, uColIndex++, '-');
		}
	assert(uColIndex == uColCount);
	return uColCount;
	}
コード例 #4
0
ファイル: seq.cpp プロジェクト: bigmuscle/bigmuscle
void Seq::ExtractUngapped(MSA &msa) const
	{
	msa.Clear();
	unsigned uColCount = Length();
	msa.SetSize(1, 1);
	unsigned uUngappedPos = 0;
	for (unsigned n = 0; n < uColCount; ++n)
		{
		char c = at(n);
		if (!IsGapChar(c))
			msa.SetChar(0, uUngappedPos++, c);
		}
	msa.SetSeqName(0, m_ptrName);
	}
コード例 #5
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void MSAFromSeqRange(const MSA &msaIn, unsigned uFromSeqIndex, unsigned uSeqCount,
  MSA &msaOut)
	{
	const unsigned uColCount = msaIn.GetColCount();
	msaOut.SetSize(uSeqCount, uColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		const char *ptrName = msaIn.GetSeqName(uFromSeqIndex + uSeqIndex);
		msaOut.SetSeqName(uSeqIndex, ptrName);

		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msaIn.GetChar(uFromSeqIndex + uSeqIndex, uColIndex);
			msaOut.SetChar(uSeqIndex, uColIndex, c);
			}
		}
	}
コード例 #6
0
ファイル: stabilize.cpp プロジェクト: bigmuscle/bigmuscle
void Stabilize(const MSA &msa, MSA &msaStable)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();

	msaStable.SetSize(uSeqCount, uColCount);
	for (unsigned uId = 0; uId < uSeqCount; ++uId)
		{
		const unsigned uSeqIndex = msa.GetSeqIndex(uId);
		msaStable.SetSeqName(uId, msa.GetSeqName(uSeqIndex));
		msaStable.SetSeqId(uSeqIndex, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msa.GetChar(uSeqIndex, uColIndex);
			msaStable.SetChar(uId, uColIndex, c);
			}
		}
	}
コード例 #7
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
void MSAFromSeqSubset(const MSA &msaIn, const unsigned uSeqIndexes[], unsigned uSeqCount,
  MSA &msaOut)
	{
	const unsigned uColCount = msaIn.GetColCount();
	msaOut.SetSize(uSeqCount, uColCount);
	for (unsigned uSeqIndexOut = 0; uSeqIndexOut < uSeqCount; ++uSeqIndexOut)
		{
		unsigned uSeqIndexIn = uSeqIndexes[uSeqIndexOut];
		const char *ptrName = msaIn.GetSeqName(uSeqIndexIn);
		unsigned uId = msaIn.GetSeqId(uSeqIndexIn);
		msaOut.SetSeqName(uSeqIndexOut, ptrName);
		msaOut.SetSeqId(uSeqIndexOut, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msaIn.GetChar(uSeqIndexIn, uColIndex);
			msaOut.SetChar(uSeqIndexOut, uColIndex, c);
			}
		}
	}
コード例 #8
0
ファイル: msa2.cpp プロジェクト: bigmuscle/bigmuscle
// "Catenate" two MSAs (by bad analogy with UNIX cat command).
// msa1 and msa2 must have same sequence names, but possibly
// in a different order.
// msaCat is the combined alignment produce by appending
// sequences in msa2 to sequences in msa1.
void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat)
	{
	const unsigned uSeqCount = msa1.GetSeqCount();

	const unsigned uColCount1 = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	const unsigned uColCountCat = uColCount1 + uColCount2;

	msaCat.SetSize(uSeqCount, uColCountCat);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		for (unsigned uColIndex = 0; uColIndex < uColCount1; ++uColIndex)
			{
			const char c = msa1.GetChar(uSeqIndex, uColIndex);
			msaCat.SetChar(uSeqIndex, uColIndex, c);
			}

		const char *ptrSeqName = msa1.GetSeqName(uSeqIndex);
		unsigned uSeqIndex2;
		msaCat.SetSeqName(uSeqIndex, ptrSeqName);
		bool bFound = msa2.GetSeqIndex(ptrSeqName, &uSeqIndex2);
		if (bFound)
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				{
				const char c = msa2.GetChar(uSeqIndex2, uColIndex);
				msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
				}
			}
		else
			{
			for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
			}
		}
	}
コード例 #9
0
ファイル: anchoredpp.cpp プロジェクト: Wyss/mauve-py
/* a version of ScoreSeqPairGaps that computes a per-residue score */
SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2, SCORE MatchScore[] )
	{
	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPairGaps, different lengths");

#if	TRACE_SEQPAIR
	{
	Log("\n");
	Log("ScoreSeqPairGaps\n");
	MSA msaTmp;
	msaTmp.SetSize(2, uColCount);
	msaTmp.CopySeq(0, msa1, uSeqIndex1);
	msaTmp.CopySeq(1, msa2, uSeqIndex2);
	msaTmp.LogMe();
	}
#endif

	SCORE scoreGaps = 0;
	bool bGapping1 = false;
	bool bGapping2 = false;

	unsigned uColStart = 0;
	bool bLeftTermGap = false;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bLeftTermGap = true;
			uColStart = uColIndex;
			break;
			}
		}

	unsigned uColEnd = uColCount - 1;
	bool bRightTermGap = false;
	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bRightTermGap = true;
			uColEnd = (unsigned) iColIndex;
			break;
			}
		}

#if	TRACE_SEQPAIR
	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif

	unsigned gap_left_col = 0;
	SCORE cur_gap_score = 0;
	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);

		if (bGap1 && bGap2)
			continue;

		if (bGap1)
			{
			if (!bGapping1)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 1 col %d\n", uColIndex);
#endif
				gap_left_col = uColIndex;
				if (uColIndex == uColStart)
					{
					scoreGaps += TermGapScore(true);
					cur_gap_score += TermGapScore(true);
				}else{
					scoreGaps += g_scoreGapOpen.get();
					cur_gap_score += g_scoreGapOpen.get();
					}
				bGapping1 = true;
				}
			else
				{
				scoreGaps += g_scoreGapExtend.get();
				cur_gap_score += g_scoreGapExtend.get();
				}
			continue;
			}

		else if (bGap2)
			{
			if (!bGapping2)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 2 col %d\n", uColIndex);
#endif
				gap_left_col = uColIndex;
				if (uColIndex == uColStart)
					{
					scoreGaps += TermGapScore(true);
					cur_gap_score += TermGapScore(true);
				}else{
					scoreGaps += g_scoreGapOpen.get();
					cur_gap_score += g_scoreGapOpen.get();
					}
				bGapping2 = true;
				}
			else
				{
				scoreGaps += g_scoreGapExtend.get();
				cur_gap_score += g_scoreGapExtend.get();
				}
			continue;
			}

		if( MatchScore != NULL && (bGapping1 || bGapping2) )
		{
			// spread the total gap penalty evenly across all columns
			SCORE per_site_penalty = cur_gap_score / (uColIndex-gap_left_col);
			for( unsigned uGapIndex = gap_left_col; uGapIndex < uColIndex; ++uGapIndex )
				{
				MatchScore[uGapIndex] = per_site_penalty;
				}
			gap_left_col = uInsane;
			cur_gap_score = 0;
		}
		bGapping1 = false;
		bGapping2 = false;
		}

	if (bGapping1 || bGapping2)
		{
		scoreGaps -= g_scoreGapOpen.get();
		scoreGaps += TermGapScore(true);
		cur_gap_score -= g_scoreGapOpen.get();
		cur_gap_score += TermGapScore(true);

		if( MatchScore != NULL )
			{
			// spread the total gap penalty evenly across all columns
			SCORE per_site_penalty = cur_gap_score / (uColCount-gap_left_col);
			for( unsigned uGapIndex = gap_left_col; uGapIndex < uColCount; ++uGapIndex )
				{
				MatchScore[uGapIndex] = per_site_penalty;
				}
			}
		}
	return scoreGaps;
	}
コード例 #10
0
ファイル: refinevertP.cpp プロジェクト: ggrekhov/ugene
    // Return true if any changes made
    bool RefineTask::RefineVertP(MSA* msaIn, unsigned uIters) 
    {
        bool bAnyChanges = false;
        const unsigned uColCountIn = msaIn->GetColCount();
        const unsigned uSeqCountIn = msaIn->GetSeqCount();

        if (uColCountIn < 3 || uSeqCountIn < 3)
            return false;

        unsigned *AnchorCols = new unsigned[uColCountIn];
        unsigned uAnchorColCount;
        SetMSAWeightsMuscle(*msaIn);
        FindAnchorCols(*msaIn, AnchorCols, &uAnchorColCount);

        const unsigned uRangeCount = uAnchorColCount + 1;
        Range *Ranges = new Range[uRangeCount];

        ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges);
        ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount);

        delete[] AnchorCols;

        MSA msaOut;
        msaOut.SetSize(uSeqCountIn, 0);

        for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn ; ++uSeqIndex)
        {
            const char *ptrName = msaIn->GetSeqName(uSeqIndex);
            unsigned uId = msaIn->GetSeqId(uSeqIndex);
            msaOut.SetSeqName(uSeqIndex, ptrName);
            msaOut.SetSeqId(uSeqIndex, uId);
        }


        MuscleContext* ctx = getMuscleContext();
        workpool->uRangeCount = uRangeCount;

        for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount && !ctx->isCanceled(); ++uRangeIndex)
        {
            workpool->uRangeIndex = uRangeIndex;
            MSA msaRange;
            const Range &r = Ranges[uRangeIndex];

            const unsigned uFromColIndex = r.m_uBestColLeft;
            const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex;

            if (0 == uRangeColCount)
                continue;
            else if (1 == uRangeColCount)
            {
                MSAFromColRange(*msaIn, uFromColIndex, 1, msaRange);
                MSAAppend(msaOut, msaRange);
                continue;
            }
            MSAFromColRange(*msaIn, uFromColIndex, uRangeColCount, msaRange);
            bool &bLockLeft = workpool->bLockLeft;
            bool &bLockRight = workpool->bLockRight;
            bLockLeft = (0 != uRangeIndex);
            bLockRight = (uRangeCount - 1 != uRangeIndex);
            bool bAnyChangesThisBlock = RefineHorizP(&msaRange, uIters, bLockLeft, bLockRight);
            bAnyChanges = (bAnyChanges || bAnyChangesThisBlock);

            MSAAppend(msaOut, msaRange);

        }

        delete[] Ranges;
        if (ctx->isCanceled()) {
            throw MuscleException("Canceled");
        }

#if	DEBUG
        // Sanity check
        AssertMSAEqIgnoreCaseAndGaps(*msaIn, msaOut);
#endif

        if (bAnyChanges) {
            msaIn->Copy(msaOut);
        }
        return bAnyChanges;
    }
コード例 #11
0
ファイル: refinevert.cpp プロジェクト: bigmuscle/bigmuscle
// Return true if any changes made
bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters)
	{
	bool bAnyChanges = false;

	const unsigned uColCountIn = msaIn.GetColCount();
	const unsigned uSeqCountIn = msaIn.GetSeqCount();

	if (uColCountIn < 3 || uSeqCountIn < 3)
		return false;

	unsigned *AnchorCols = new unsigned[uColCountIn];
	unsigned uAnchorColCount;
	SetMSAWeightsMuscle(msaIn);
	FindAnchorCols(msaIn, AnchorCols, &uAnchorColCount);

	const unsigned uRangeCount = uAnchorColCount + 1;
	Range *Ranges = new Range[uRangeCount];

#if	TRACE
	Log("%u ranges\n", uRangeCount);
#endif

	ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges);
	ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount);

#if	TRACE
	{
	Log("Anchor cols: ");
	for (unsigned i = 0; i < uAnchorColCount; ++i)
		Log(" %u", AnchorCols[i]);
	Log("\n");

	Log("Ranges:\n");
	for (unsigned i = 0; i < uRangeCount; ++i)
		Log("%4u - %4u\n", Ranges[i].m_uBestColLeft, Ranges[i].m_uBestColRight);
	}
#endif

	delete[] AnchorCols;

	MSA msaOut;
	msaOut.SetSize(uSeqCountIn, 0);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)
		{
		const char *ptrName = msaIn.GetSeqName(uSeqIndex);
		unsigned uId = msaIn.GetSeqId(uSeqIndex);
		msaOut.SetSeqName(uSeqIndex, ptrName);
		msaOut.SetSeqId(uSeqIndex, uId);
		}

	for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount; ++uRangeIndex)
		{
		MSA msaRange;

		const Range &r = Ranges[uRangeIndex];

		const unsigned uFromColIndex = r.m_uBestColLeft;
		const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex;

		if (0 == uRangeColCount)
			continue;
		else if (1 == uRangeColCount)
			{
			MSAFromColRange(msaIn, uFromColIndex, 1, msaRange);
			MSAAppend(msaOut, msaRange);
			continue;
			}
		MSAFromColRange(msaIn, uFromColIndex, uRangeColCount, msaRange);

#if	TRACE
		Log("\n-------------\n");
		Log("Range %u - %u count=%u\n", r.m_uBestColLeft, r.m_uBestColRight, uRangeColCount);
		Log("Before:\n");
		msaRange.LogMe();
#endif

		bool bLockLeft = (0 != uRangeIndex);
		bool bLockRight = (uRangeCount - 1 != uRangeIndex);
		bool bAnyChangesThisBlock = RefineHoriz(msaRange, tree, uIters, bLockLeft, bLockRight);
		bAnyChanges = (bAnyChanges || bAnyChangesThisBlock);

#if	TRACE
		Log("After:\n");
		msaRange.LogMe();
#endif

		MSAAppend(msaOut, msaRange);

#if	TRACE
		Log("msaOut after Cat:\n");
		msaOut.LogMe();
#endif
		}

#if	DEBUG
// Sanity check
	AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);
#endif

	delete[] Ranges;
	if (bAnyChanges)
		msaIn.Copy(msaOut);
	return bAnyChanges;
	}
コード例 #12
0
ファイル: refinew.cpp プロジェクト: bigmuscle/bigmuscle
void RefineW(const MSA &msaIn, MSA &msaOut)
	{
	const unsigned uSeqCount = msaIn.GetSeqCount();
	const unsigned uColCount = msaIn.GetColCount();

// Reserve same nr seqs, 20% more cols
	const unsigned uReserveColCount = (uColCount*120)/100;
	msaOut.SetSize(uSeqCount, uReserveColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		msaOut.SetSeqName(uSeqIndex, msaIn.GetSeqName(uSeqIndex));
		msaOut.SetSeqId(uSeqIndex, msaIn.GetSeqId(uSeqIndex));
		}

	const unsigned uWindowCount = (uColCount + g_uRefineWindow - 1)/g_uRefineWindow;
	if (0 == g_uWindowTo)
		g_uWindowTo = uWindowCount - 1;

#if	MEMDEBUG
	_CrtSetBreakAlloc(1560);
#endif

	if (g_uWindowOffset > 0)
		{
		MSA msaTmp;
		MSAFromColRange(msaIn, 0, g_uWindowOffset, msaOut);
		}

	fprintf(stderr, "\n");
	for (unsigned uWindowIndex = g_uWindowFrom; uWindowIndex <= g_uWindowTo; ++uWindowIndex)
		{
		fprintf(stderr, "Window %d of %d    \r", uWindowIndex, uWindowCount);
		const unsigned uColFrom = g_uWindowOffset + uWindowIndex*g_uRefineWindow;
		unsigned uColTo = uColFrom + g_uRefineWindow - 1;
		if (uColTo >= uColCount)
			uColTo = uColCount - 1;
		assert(uColTo >= uColFrom);

		SeqVect v;
		SeqVectFromMSACols(msaIn, uColFrom, uColTo, v);

#if	MEMDEBUG
		_CrtMemState s1;
		_CrtMemCheckpoint(&s1);
#endif

		MSA msaTmp;
		MUSCLE(v, msaTmp);
		AppendMSA(msaOut, msaTmp);
		if (uWindowIndex == g_uSaveWindow)
			{
			MSA msaInTmp;
			unsigned uOutCols = msaOut.GetColCount();
			unsigned un = uColTo - uColFrom + 1;
			MSAFromColRange(msaIn, uColFrom, un, msaInTmp);

			char fn[256];
			sprintf(fn, "win%d_inaln.tmp", uWindowIndex);
			TextFile fIn(fn, true);
			msaInTmp.ToFile(fIn);

			sprintf(fn, "win%d_inseqs.tmp", uWindowIndex);
			TextFile fv(fn, true);
			v.ToFile(fv);

			sprintf(fn, "win%d_outaln.tmp", uWindowIndex);
			TextFile fOut(fn, true);
			msaTmp.ToFile(fOut);
			}

#if	MEMDEBUG
		void FreeDPMemSPN();
		FreeDPMemSPN();

		_CrtMemState s2;
		_CrtMemCheckpoint(&s2);

		_CrtMemState s;
		_CrtMemDifference(&s, &s1, &s2);

		_CrtMemDumpStatistics(&s);
		_CrtMemDumpAllObjectsSince(&s1);
		exit(1);
#endif
//#if	DEBUG
//		AssertMSAEqIgnoreCaseAndGaps(msaInTmp, msaTmp);
//#endif
		}
	fprintf(stderr, "\n");

//	AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);//@@uncomment!
	}
コード例 #13
0
ファイル: makerootmsa.cpp プロジェクト: Unode/ext_apps
void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
  MSA &a)
	{
#if	TRACE
	Log("MakeRootMSA Tree=");
	GuideTree.LogMe();
#endif
	const unsigned uSeqCount = v.GetSeqCount();
	unsigned uColCount = uInsane;
	unsigned uSeqIndex = 0;
	const unsigned uTreeNodeCount = GuideTree.GetNodeCount();
	const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
	const PWPath &RootPath = Nodes[uRootNodeIndex].m_Path;
	const unsigned uRootColCount = RootPath.GetEdgeCount();
	const unsigned uEstringSize = uRootColCount + 1;
	short *Estring1 = new short[uEstringSize];
	short *Estring2 = new short[uEstringSize];
	SetProgressDesc("Root alignment");

	unsigned uTreeNodeIndex = GetFirstNodeIndex(GuideTree);
	do
		{
		Progress(uSeqIndex, uSeqCount);

		unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
		const Seq &s = *(v[uId]);

		Seq sRootE;
		short *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE,
		  Estring1, Estring2);
		Nodes[uTreeNodeIndex].m_EstringL = EstringNewCopy(es);

#if	VALIDATE
		Seq sRoot;
		MakeRootSeq(s, GuideTree, uTreeNodeIndex, Nodes, sRoot);
		if (!sRoot.Eq(sRootE))
			{
			Log("sRoot=");
			sRoot.LogMe();
			Log("sRootE=");
			sRootE.LogMe();
			Quit("Root seqs differ");
			}
#if	TRACE
		Log("MakeRootSeq=\n");
		sRoot.LogMe();
#endif
#endif

		if (uInsane == uColCount)
			{
			uColCount = sRootE.Length();
			a.SetSize(uSeqCount, uColCount);
			}
		else
			{
			assert(uColCount == sRootE.Length());
			}
		a.SetSeqName(uSeqIndex, s.GetName());
		a.SetSeqId(uSeqIndex, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			a.SetChar(uSeqIndex, uColIndex, sRootE[uColIndex]);
		++uSeqIndex;

		uTreeNodeIndex = GetNextNodeIndex(GuideTree, uTreeNodeIndex);
		}
	while (NULL_NEIGHBOR != uTreeNodeIndex);

	delete[] Estring1;
	delete[] Estring2;

	ProgressStepsDone();
	assert(uSeqIndex == uSeqCount);
	}
コード例 #14
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2)
	{
	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPairLetters, different lengths");

#if	TRACE_SEQPAIR
	{
	Log("\n");
	Log("ScoreSeqPairLetters\n");
	MSA msaTmp;
	msaTmp.SetSize(2, uColCount);
	msaTmp.CopySeq(0, msa1, uSeqIndex1);
	msaTmp.CopySeq(1, msa2, uSeqIndex2);
	msaTmp.LogMe();
	}
#endif

	SCORE scoreLetters = 0;
	SCORE scoreGaps = 0;
	bool bGapping1 = false;
	bool bGapping2 = false;

	unsigned uColStart = 0;
	bool bLeftTermGap = false;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bLeftTermGap = true;
			uColStart = uColIndex;
			break;
			}
		}

	unsigned uColEnd = uColCount - 1;
	bool bRightTermGap = false;
	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bRightTermGap = true;
			uColEnd = (unsigned) iColIndex;
			break;
			}
		}

#if	TRACE_SEQPAIR
	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif

	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
		{
		unsigned uLetter1 = msa1.GetLetterEx(uSeqIndex1, uColIndex);
		if (uLetter1 >= g_AlphaSize)
			continue;
		unsigned uLetter2 = msa2.GetLetterEx(uSeqIndex2, uColIndex);
		if (uLetter2 >= g_AlphaSize)
			continue;

		SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
		scoreLetters += scoreMatch;
		}
	return scoreLetters;
	}
コード例 #15
0
ファイル: objscore2.cpp プロジェクト: bigmuscle/bigmuscle
SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,
  const MSA &msa2, unsigned uSeqIndex2)
	{
	const unsigned uColCount = msa1.GetColCount();
	const unsigned uColCount2 = msa2.GetColCount();
	if (uColCount != uColCount2)
		Quit("ScoreSeqPairGaps, different lengths");

#if	TRACE_SEQPAIR
	{
	Log("\n");
	Log("ScoreSeqPairGaps\n");
	MSA msaTmp;
	msaTmp.SetSize(2, uColCount);
	msaTmp.CopySeq(0, msa1, uSeqIndex1);
	msaTmp.CopySeq(1, msa2, uSeqIndex2);
	msaTmp.LogMe();
	}
#endif

	SCORE scoreGaps = 0;
	bool bGapping1 = false;
	bool bGapping2 = false;

	unsigned uColStart = 0;
	bool bLeftTermGap = false;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bLeftTermGap = true;
			uColStart = uColIndex;
			break;
			}
		}

	unsigned uColEnd = uColCount - 1;
	bool bRightTermGap = false;
	for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
		if (!bGap1 || !bGap2)
			{
			if (bGap1 || bGap2)
				bRightTermGap = true;
			uColEnd = (unsigned) iColIndex;
			break;
			}
		}

#if	TRACE_SEQPAIR
	Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif

	for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
		{
		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);

		if (bGap1 && bGap2)
			continue;

		if (bGap1)
			{
			if (!bGapping1)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 1 col %d\n", uColIndex);
#endif
				if (uColIndex == uColStart)
					scoreGaps += TermGapScore(true);
				else
					scoreGaps += g_scoreGapOpen;
				bGapping1 = true;
				}
			else
				scoreGaps += g_scoreGapExtend;
			continue;
			}

		else if (bGap2)
			{
			if (!bGapping2)
				{
#if	TRACE_SEQPAIR
				Log("Gap open seq 2 col %d\n", uColIndex);
#endif
				if (uColIndex == uColStart)
					scoreGaps += TermGapScore(true);
				else
					scoreGaps += g_scoreGapOpen;
				bGapping2 = true;
				}
			else
				scoreGaps += g_scoreGapExtend;
			continue;
			}

		bGapping1 = false;
		bGapping2 = false;
		}

	if (bGapping1 || bGapping2)
		{
		scoreGaps -= g_scoreGapOpen;
		scoreGaps += TermGapScore(true);
		}
	return scoreGaps;
	}
コード例 #16
0
ファイル: refinew.cpp プロジェクト: Wyss/mauve-py
void RefineW(const MSA &msaIn, MSA &msaOut)
	{
	const unsigned uSeqCount = msaIn.GetSeqCount();
	const unsigned uColCount = msaIn.GetColCount();

// Reserve same nr seqs, 20% more cols
	const unsigned uReserveColCount = (uColCount*120)/100;
	msaOut.SetSize(uSeqCount, uReserveColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		msaOut.SetSeqName(uSeqIndex, msaIn.GetSeqName(uSeqIndex));
		msaOut.SetSeqId(uSeqIndex, msaIn.GetSeqId(uSeqIndex));
		}

	const unsigned uWindowCount = (uColCount + g_uRefineWindow.get() - 1)/g_uRefineWindow.get();
	if (0 == g_uWindowTo.get())
		g_uWindowTo.get() = uWindowCount - 1;

#if	MEMDEBUG
	_CrtSetBreakAlloc(1560);
#endif

	if (g_uWindowOffset.get() > 0)
		{
		MSA msaTmp;
		MSAFromColRange(msaIn, 0, g_uWindowOffset.get(), msaOut);
		}

	if (!g_bQuiet.get())
		fprintf(stderr, "\n");
	for (unsigned uWindowIndex = g_uWindowFrom.get(); uWindowIndex <= g_uWindowTo.get(); ++uWindowIndex)
		{
		if (!g_bQuiet.get())
			fprintf(stderr, "Window %d of %d    \r", uWindowIndex, uWindowCount);
		const unsigned uColFrom = g_uWindowOffset.get() + uWindowIndex*g_uRefineWindow.get();
		unsigned uColTo = uColFrom + g_uRefineWindow.get() - 1;
		if (uColTo >= uColCount)
			uColTo = uColCount - 1;
		assert(uColTo >= uColFrom);

		SeqVect v;
		SeqVectFromMSACols(msaIn, uColFrom, uColTo, v);

#if	MEMDEBUG
		_CrtMemState s1;
		_CrtMemCheckpoint(&s1);
#endif
		// Begin AED 5/20/06
		// remove any empty seqs in this window
		std::vector< size_t > empty_seqs;
		SeqVect vr;
		for( size_t seqI = 0; seqI < v.size(); ++seqI )
		{
			if( v[seqI]->size() == 0 )
				empty_seqs.push_back(seqI);
			else
				vr.push_back(v[seqI]);
		}
		std::vector< unsigned > seqid_map( vr.size() );
		for( size_t seqI = 0; seqI < vr.size(); ++seqI )
		{
			seqid_map[seqI] = vr[seqI]->GetId();
			vr[seqI]->SetId(seqI);
		}

		MSA msaTmp;
		if( vr.size() > 1 )
			MUSCLE(vr, msaTmp);

		// remap the seqids to their original state
		for( size_t seqI = 0; seqI < vr.size(); ++seqI )
			vr[seqI]->SetId(seqid_map[seqI]);

		// merge empty seqs back in
		{
			const unsigned uSeqCount = msaOut.GetSeqCount();

			const unsigned uColCount1 = msaOut.GetColCount();
			const unsigned uColCount2 = vr.size() > 1 ? msaTmp.GetColCount() : vr[0]->size();
			const unsigned uColCountCat = uColCount1 + uColCount2;
			for( unsigned seqI = 0; seqI < vr.size(); ++seqI )
			{
				unsigned uSeqIndex = msaOut.GetSeqIndex(seqid_map[seqI]);
				if( vr.size() > 1 )
				{
					unsigned uSeqIndex2 = msaTmp.GetSeqIndex(seqI);
					for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
					{
						const char c = msaTmp.GetChar(uSeqIndex2, uColIndex);
						msaOut.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
					}
				}else{
					for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
					{
						const char c = vr[0]->GetChar(uColIndex);
						msaOut.SetChar(uSeqIndex, uColCount1 + uColIndex, c);
					}
				}
			}
			for( unsigned seqI = 0; seqI < empty_seqs.size(); ++seqI )
			{
				unsigned uSeqId2 = v[empty_seqs[seqI]]->GetId();
				unsigned uSeqIndex = msaOut.GetSeqIndex(uSeqId2);
				for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)
				{
					msaOut.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');
				}
			}
			vr.clear();
		}
//		AppendMSA(msaOut, msaTmp);
		// end AED 5/20/06

		if (uWindowIndex == g_uSaveWindow.get())
			{
			MSA msaInTmp;
			unsigned uOutCols = msaOut.GetColCount();
			unsigned un = uColTo - uColFrom + 1;
			MSAFromColRange(msaIn, uColFrom, un, msaInTmp);

			char fn[256];
			sprintf(fn, "win%d_inaln.tmp", uWindowIndex);
			TextFile fIn(fn, true);
			msaInTmp.ToFile(fIn);

			sprintf(fn, "win%d_inseqs.tmp", uWindowIndex);
			TextFile fv(fn, true);
			v.ToFile(fv);

			sprintf(fn, "win%d_outaln.tmp", uWindowIndex);
			TextFile fOut(fn, true);
			msaTmp.ToFile(fOut);
			}

#if	MEMDEBUG
		void FreeDPMemSPN();
		FreeDPMemSPN();

		_CrtMemState s2;
		_CrtMemCheckpoint(&s2);

		_CrtMemState s;
		_CrtMemDifference(&s, &s1, &s2);

		_CrtMemDumpStatistics(&s);
		_CrtMemDumpAllObjectsSince(&s1);
		exit(1);
#endif
//#if	DEBUG
//		AssertMSAEqIgnoreCaseAndGaps(msaInTmp, msaTmp);
//#endif
		}
	if (!g_bQuiet.get())
		fprintf(stderr, "\n");

//	AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);//@@uncomment!
	}
コード例 #17
0
ファイル: anchoredpp.cpp プロジェクト: Wyss/mauve-py
// Return true if any changes made
void AnchoredProfileProfile(MSA &msa1, MSA &msa2, MSA &msaOut)
	{

	const unsigned uColCountIn = msa1.GetColCount();
	const unsigned uSeqCountIn = msa1.GetSeqCount() + msa2.GetSeqCount();

	unsigned *AnchorCols = new unsigned[uColCountIn];
	unsigned uAnchorColCount;

	PrepareMSAforScoring(msa1);
	PrepareMSAforScoring(msa2);
	FindAnchorColsPP(msa1, msa2, AnchorCols, &uAnchorColCount);

	const unsigned uRangeCount = uAnchorColCount + 1;
	Range *Ranges = new Range[uRangeCount];

#if	TRACE
	Log("%u ranges\n", uRangeCount);
#endif

	ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges);
	ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount);

#if	TRACE
	{
	Log("Anchor cols: ");
	for (unsigned i = 0; i < uAnchorColCount; ++i)
		Log(" %u", AnchorCols[i]);
	Log("\n");

	Log("Ranges:\n");
	for (unsigned i = 0; i < uRangeCount; ++i)
		Log("%4u - %4u\n", Ranges[i].m_uBestColLeft, Ranges[i].m_uBestColRight);
	}
#endif

	delete[] AnchorCols;

	msaOut.SetSize(uSeqCountIn, 0);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)
		{
		const char *ptrName;
		unsigned uId;
		if( uSeqIndex < msa1.GetSeqCount() )
			{
			msa1.SetSeqId(uSeqIndex, uSeqIndex);
			ptrName = msa1.GetSeqName(uSeqIndex);
			}
		else
			{
			msa2.SetSeqId(uSeqIndex-msa1.GetSeqCount(), uSeqIndex);
			ptrName = msa2.GetSeqName(uSeqIndex-msa1.GetSeqCount());
			}
		msaOut.SetSeqName(uSeqIndex, ptrName);
		msaOut.SetSeqId(uSeqIndex, uSeqIndex);
		}

	for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount; ++uRangeIndex)
		{
		MSA msaRange1;
		MSA msaRange2;
		MSA msaRangeOut;

		const Range &r = Ranges[uRangeIndex];

		const unsigned uFromColIndex = r.m_uBestColLeft;
		const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex;

		if (0 == uRangeColCount)
			continue;
/*		else if (1 == uRangeColCount)
			{
			MSAFromColRange(msaIn, uFromColIndex, 1, msaRange);
			MSAAppend(msaOut, msaRange);
			continue;
			}
*/
		MSAFromColRange(msa1, uFromColIndex, uRangeColCount, msaRange1);
		MSAFromColRange(msa2, uFromColIndex, uRangeColCount, msaRange2);
		StripGapColumns(msaRange1);
		StripGapColumns(msaRange2);

#if	TRACE
		Log("\n-------------\n");
		Log("Range %u - %u count=%u\n", r.m_uBestColLeft, r.m_uBestColRight, uRangeColCount);
		Log("Before:\n");
		msaRange1.LogMe();
		msaRange2.LogMe();
#endif

		ProfileProfile(msaRange1, msaRange2, msaRangeOut);

#if	TRACE
		Log("After:\n");
		msaRangeOut.LogMe();
#endif
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)
			msaRangeOut.SetSeqId(uSeqIndex, uSeqIndex);

		MSAAppend(msaOut, msaRangeOut);

#if	TRACE
		Log("msaOut after Cat:\n");
		msaOut.LogMe();
#endif
		}

	delete[] Ranges;
	}