Example #1
0
void SeqVect::PadToMSA(MSA &msa)
	{
	unsigned uSeqCount = Length();
	if (0 == uSeqCount)
		{
		msa.Clear();
		return;
		}

	unsigned uLongestSeqLength = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq *ptrSeq = at(uSeqIndex);
		unsigned uColCount = ptrSeq->Length();
		if (uColCount > uLongestSeqLength)
			uLongestSeqLength = uColCount;
		}
	msa.SetSize(uSeqCount, uLongestSeqLength);
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		Seq *ptrSeq = at(uSeqIndex);
		msa.SetSeqName(uSeqIndex, ptrSeq->GetName());
		unsigned uColCount = ptrSeq->Length();
		unsigned uColIndex;
		for (uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			char c = ptrSeq->at(uColIndex);
			msa.SetChar(uSeqIndex, uColIndex, c);
			}
		while (uColIndex < uLongestSeqLength)
			msa.SetChar(uSeqIndex, uColIndex++, '.');
		}
	}
static SCORE Subst(const Seq &seqA, const Seq &seqB, unsigned i, unsigned j)
	{
	assert(i < seqA.Length());
	assert(j < seqB.Length());

	unsigned uLetterA = seqA.GetLetter(i);
	unsigned uLetterB = seqB.GetLetter(j);
	return VTML_SP[uLetterA][uLetterB] + g_scoreCenter;
	}
Example #3
0
void Seq::CopyReversed(const Seq &rhs)
	{
	clear();
	const unsigned uLength = rhs.Length();
	const unsigned uBase = rhs.Length() - 1;
	for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex)
		push_back(rhs.at(uBase - uColIndex));
	const char *ptrName = rhs.GetName();
	size_t n = strlen(ptrName) + 1;
	m_ptrName = new char[n];
	strcpy(m_ptrName, ptrName);
	}
Example #4
0
void convertMAlignment2SecVect(SeqVect& sv, const MultipleSequenceAlignment& ma, bool fixAlpha) {
    sv.Clear();

    MuscleContext *ctx = getMuscleContext();
    ctx->fillUidsVectors(ma->getNumRows());

    unsigned i=0;
    unsigned seq_count = 0;
    foreach(const MultipleSequenceAlignmentRow& row, ma->getMsaRows()) {
        Seq *ptrSeq = new Seq();
        QByteArray name =  row->getName().toLocal8Bit();
        ptrSeq->FromString(row->getCore().constData(), name.constData());
        //stripping gaps, original Seq::StripGaps fails on MSVC9
        Seq::iterator newEnd = std::remove(ptrSeq->begin(), ptrSeq->end(), U2Msa::GAP_CHAR);
        ptrSeq->erase(newEnd, ptrSeq->end());
        if (ptrSeq->Length()!=0) {
            ctx->tmp_uIds[seq_count] = ctx->input_uIds[i];
            sv.push_back(ptrSeq);
            seq_count++; 
        }
        i++;
    }
    if (fixAlpha) {
        sv.FixAlpha();
    }
}
Example #5
0
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a)
	{
	unsigned uSymbols;
	unsigned uIndels;
	EstringCounts(es, &uSymbols, &uIndels);
	assert(sIn.Length() == uSymbols);

	unsigned uColCount = uSymbols + uIndels;

	a.Clear();
	a.SetSize(1, uColCount);

	a.SetSeqName(0, sIn.GetName());
	a.SetSeqId(0, sIn.GetId());

	unsigned p = 0;
	unsigned uColIndex = 0;
	for (;;)
		{
		int n = *es++;
		if (0 == n)
			break;
		if (n > 0)
			for (int i = 0; i < n; ++i)
				{
				char c = sIn[p++];
				a.SetChar(0, uColIndex++, c);
				}
		else
			for (int i = 0; i < -n; ++i)
				a.SetChar(0, uColIndex++, '-');
		}
	assert(uColIndex == uColCount);
	return uColCount;
	}
Example #6
0
void EstringOp(const short es[], const Seq &sIn, Seq &sOut)
	{
#if	DEBUG
	unsigned uSymbols;
	unsigned uIndels;
	EstringCounts(es, &uSymbols, &uIndels);
	assert(sIn.Length() == uSymbols);
#endif
	sOut.Clear();
	sOut.SetName(sIn.GetName());
	int p = 0;
	for (;;)
		{
		int n = *es++;
		if (0 == n)
			break;
		if (n > 0)
			for (int i = 0; i < n; ++i)
				{
				char c = sIn[p++];
				sOut.push_back(c);
				}
		else
			for (int i = 0; i < -n; ++i)
				sOut.push_back('-');
		}
	}
Example #7
0
bool Seq::EqIgnoreCase(const Seq &s) const
	{
	const unsigned n = Length();
	if (n != s.Length())
		{
		return false;
		}
	for (unsigned i = 0; i < n; ++i)
		{
		const char c1 = at(i);
		const char c2 = s.at(i);
		if (IsGap(c1))
			{
			if (!IsGap(c2))
				return false;
			}
		else
			{
			if (toupper(c1) != toupper(c2))
				{
				return false;
				}
			}
		}
	return true;
	}
static void RowFromSeq(const Seq &s, SCORE *Row[])
	{
	const unsigned uLength = s.Length();
	for (unsigned i = 0; i < uLength; ++i)
		{
		char c = s.GetChar(i);
		unsigned uLetter = CharToLetter(c);
		if (uLetter < 20)
			Row[i] = VTML_SP[uLetter];
		else
			Row[i] = VTML_SP[AX_X];
		}
	}
static void LettersFromSeq(const Seq &s, unsigned Letters[])
	{
	const unsigned uLength = s.Length();
	for (unsigned i = 0; i < uLength; ++i)
		{
		char c = s.GetChar(i);
		unsigned uLetter = CharToLetter(c);
		if (uLetter < 20)
			Letters[i] = uLetter;
		else
			Letters[i] = AX_X;
		}
	}
Example #10
0
bool Seq::Eq(const Seq &s) const
	{
	const unsigned n = Length();
	if (n != s.Length())
		return false;
	for (unsigned i = 0; i < n; ++i)
		{
		const char c1 = at(i);
		const char c2 = s.at(i);
		if (c1 != c2)
			return false;
		}
	return true;
	}
Example #11
0
static void PathSeq(const Seq &s, const PWPath &Path, bool bRight, Seq &sOut)
	{
	short *esA;
	short *esB;
	PathToEstrings(Path, &esA, &esB);

	const unsigned uSeqLength = s.Length();
	const unsigned uEdgeCount = Path.GetEdgeCount();

	sOut.Clear();
	sOut.SetName(s.GetName());
	unsigned uPos = 0;
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
		char cType = Edge.cType;
		if (bRight)
			{
			if (cType == 'I')
				cType = 'D';
			else if (cType == 'D')
				cType = 'I';
			}
		switch (cType)
			{
		case 'M':
			sOut.AppendChar(s[uPos++]);
			break;
		case 'D':
			sOut.AppendChar('-');
			break;
		case 'I':
			sOut.AppendChar(s[uPos++]);
			break;
		default:
			Quit("PathSeq, invalid edge type %c", cType);
			}
		}
	}
SCORE GlobalAlignSS(const Seq &seqA, const Seq &seqB, PWPath &Path)
	{
	const unsigned uLengthA = seqA.Length();
	const unsigned uLengthB = seqB.Length();
	const unsigned uPrefixCountA = uLengthA + 1;
	const unsigned uPrefixCountB = uLengthB + 1;

	AllocDPMem(uLengthA, uLengthB);

	SCORE *MPrev = DPM.MPrev;
	SCORE *MCurr = DPM.MCurr;
	SCORE *MWork = DPM.MWork;

	SCORE *DPrev = DPM.DPrev;
	SCORE *DCurr = DPM.DCurr;
	SCORE *DWork = DPM.DWork;
	SCORE **MxRowA = DPM.MxRowA;
	unsigned *LettersB = DPM.LettersB;

	RowFromSeq(seqA, MxRowA);
	LettersFromSeq(seqB, LettersB);

	unsigned *uDeletePos = DPM.uDeletePos;

	int **TraceBack = DPM.TraceBack;

#if	DEBUG
	for (unsigned i = 0; i < uPrefixCountA; ++i)
		memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
#endif

// Special case for i=0
	TraceBack[0][0] = 0;
	MPrev[0] = MxRowA[0][LettersB[0]];

// D(0,0) is -infinity (requires I->D).
	DPrev[0] = MINUS_INFINITY;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
		unsigned uLetterB = LettersB[j];

	// Only way to get M(0, j) looks like this:
	//		A	----X
	//		B	XXXXX
	//			0   j
	// So gap-open at j=0, gap-close at j-1.
		MPrev[j] = MxRowA[0][uLetterB] + g_scoreGapOpen/2; // term gaps half
		TraceBack[0][j] = -(int) j;

	// Assume no D->I transitions, then can't be a delete if only
	// one letter from A.
		DPrev[j] = MINUS_INFINITY;
		}

	SCORE IPrev_j_1;
	for (unsigned i = 1; i < uLengthA; ++i)
		{
		SCORE *ptrMCurr_j = MCurr;
		memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));

		const SCORE *RowA = MxRowA[i];
		const SCORE *ptrRowA = MxRowA[i];
		const SCORE *ptrMCurrEnd = ptrMCurr_j + uLengthB;
		unsigned *ptrLettersB = LettersB;
		for (; ptrMCurr_j != ptrMCurrEnd; ++ptrMCurr_j)
			{
			*ptrMCurr_j = RowA[*ptrLettersB];
			++ptrLettersB;
			}

		unsigned *ptrDeletePos = uDeletePos;

	// Special case for j=0
	// Only way to get M(i, 0) looks like this:
	//			0   i
	//		A	XXXXX
	//		B	----X
	// So gap-open at i=0, gap-close at i-1.
		ptrMCurr_j = MCurr;
		assert(ptrMCurr_j == &(MCurr[0]));
		*ptrMCurr_j += g_scoreGapOpen/2;	// term gaps half

		++ptrMCurr_j;

		int *ptrTraceBack_ij = TraceBack[i];
		*ptrTraceBack_ij++ = (int) i;

		SCORE *ptrMPrev_j = MPrev;
		SCORE *ptrDPrev = DPrev;
		SCORE d = *ptrDPrev;
		SCORE DNew = *ptrMPrev_j + g_scoreGapOpen;
		if (DNew > d)
			{
			d = DNew;
			*ptrDeletePos = i;
			}

		SCORE *ptrDCurr = DCurr;

		assert(ptrDCurr == &(DCurr[0]));
		*ptrDCurr = d;

	// Can't have an insert if no letters from B
		IPrev_j_1 = MINUS_INFINITY;

		unsigned uInsertPos;

		for (unsigned j = 1; j < uLengthB; ++j)
			{
		// Here, MPrev_j is preserved from previous
		// iteration so with current i,j is M[i-1][j-1]
			SCORE MPrev_j = *ptrMPrev_j;
			SCORE INew = MPrev_j + g_scoreGapOpen;
			if (INew > IPrev_j_1)
				{
				IPrev_j_1 = INew;
				uInsertPos = j;
				}

			SCORE scoreMax = MPrev_j;

			assert(ptrDPrev == &(DPrev[j-1]));
			SCORE scoreD = *ptrDPrev++;
			if (scoreD > scoreMax)
				{
				scoreMax = scoreD;
				assert(ptrDeletePos == &(uDeletePos[j-1]));
				*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
				assert(*ptrTraceBack_ij > 0);
				}
			++ptrDeletePos;

			SCORE scoreI = IPrev_j_1;
			if (scoreI > scoreMax)
				{
				scoreMax = scoreI;
				*ptrTraceBack_ij = (int) uInsertPos - (int) j;
				assert(*ptrTraceBack_ij < 0);
				}

			*ptrMCurr_j += scoreMax;
			assert(ptrMCurr_j == &(MCurr[j]));
			++ptrMCurr_j;

			MPrev_j = *(++ptrMPrev_j);
			assert(ptrDPrev == &(DPrev[j]));
			SCORE d = *ptrDPrev;
			SCORE DNew = MPrev_j + g_scoreGapOpen;
			if (DNew > d)
				{
				d = DNew;
				assert(ptrDeletePos == &uDeletePos[j]);
				*ptrDeletePos = i;
				}
			assert(ptrDCurr + 1 == &(DCurr[j]));
			*(++ptrDCurr) = d;

			++ptrTraceBack_ij;
			}

		Rotate(MPrev, MCurr, MWork);
		Rotate(DPrev, DCurr, DWork);
		}

// Special case for i=uLengthA
	SCORE IPrev = MINUS_INFINITY;

	unsigned uInsertPos;

	for (unsigned j = 1; j < uLengthB; ++j)
		{
		SCORE INew = MPrev[j-1];
		if (INew > IPrev)
			{
			uInsertPos = j;
			IPrev = INew;
			}
		}

// Special case for i=uLengthA, j=uLengthB
	SCORE scoreMax = MPrev[uLengthB-1];
	int iTraceBack = 0;

	SCORE scoreD = DPrev[uLengthB-1] - g_scoreGapOpen/2;	// term gaps half
	if (scoreD > scoreMax)
		{
		scoreMax = scoreD;
		iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
		}

	SCORE scoreI = IPrev - g_scoreGapOpen/2;
	if (scoreI > scoreMax)
		{
		scoreMax = scoreI;
		iTraceBack = (int) uInsertPos - (int) uLengthB;
		}

	TraceBack[uLengthA][uLengthB] = iTraceBack;

	TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);

	return scoreMax;
	}
Example #13
0
void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[],
  MSA &a)
	{
#if	TRACE
	Log("MakeRootMSA Tree=");
	GuideTree.LogMe();
#endif
	const unsigned uSeqCount = v.GetSeqCount();
	unsigned uColCount = uInsane;
	unsigned uSeqIndex = 0;
	const unsigned uTreeNodeCount = GuideTree.GetNodeCount();
	const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex();
	const PWPath &RootPath = Nodes[uRootNodeIndex].m_Path;
	const unsigned uRootColCount = RootPath.GetEdgeCount();
	const unsigned uEstringSize = uRootColCount + 1;
	short *Estring1 = new short[uEstringSize];
	short *Estring2 = new short[uEstringSize];
	SetProgressDesc("Root alignment");

	unsigned uTreeNodeIndex = GetFirstNodeIndex(GuideTree);
	do
		{
		Progress(uSeqIndex, uSeqCount);

		unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex);
		const Seq &s = *(v[uId]);

		Seq sRootE;
		short *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE,
		  Estring1, Estring2);
		Nodes[uTreeNodeIndex].m_EstringL = EstringNewCopy(es);

#if	VALIDATE
		Seq sRoot;
		MakeRootSeq(s, GuideTree, uTreeNodeIndex, Nodes, sRoot);
		if (!sRoot.Eq(sRootE))
			{
			Log("sRoot=");
			sRoot.LogMe();
			Log("sRootE=");
			sRootE.LogMe();
			Quit("Root seqs differ");
			}
#if	TRACE
		Log("MakeRootSeq=\n");
		sRoot.LogMe();
#endif
#endif

		if (uInsane == uColCount)
			{
			uColCount = sRootE.Length();
			a.SetSize(uSeqCount, uColCount);
			}
		else
			{
			assert(uColCount == sRootE.Length());
			}
		a.SetSeqName(uSeqIndex, s.GetName());
		a.SetSeqId(uSeqIndex, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			a.SetChar(uSeqIndex, uColIndex, sRootE[uColIndex]);
		++uSeqIndex;

		uTreeNodeIndex = GetNextNodeIndex(GuideTree, uTreeNodeIndex);
		}
	while (NULL_NEIGHBOR != uTreeNodeIndex);

	delete[] Estring1;
	delete[] Estring2;

	ProgressStepsDone();
	assert(uSeqIndex == uSeqCount);
	}
Example #14
0
bool Seq::EqIgnoreCaseAndGaps(const Seq &s) const
	{
	const unsigned uThisLength = Length();
	const unsigned uOtherLength = s.Length();
	
	unsigned uThisPos = 0;
	unsigned uOtherPos = 0;

	int cThis;
	int cOther;
	for (;;)
		{
		if (uThisPos == uThisLength && uOtherPos == uOtherLength)
			break;

	// Set cThis to next non-gap character in this string
	// or -1 if end-of-string.
		for (;;)
			{
			if (uThisPos == uThisLength)
				{
				cThis = -1;
				break;
				}
			else
				{
				cThis = at(uThisPos);
				++uThisPos;
				if (!IsGapChar(cThis))
					{
					cThis = toupper(cThis);
					break;
					}
				}
			}

	// Set cOther to next non-gap character in s
	// or -1 if end-of-string.
		for (;;)
			{
			if (uOtherPos == uOtherLength)
				{
				cOther = -1;
				break;
				}
			else
				{
				cOther = s.at(uOtherPos);
				++uOtherPos;
				if (!IsGapChar(cOther))
					{
					cOther = toupper(cOther);
					break;
					}
				}
			}

	// Compare characters are corresponding ungapped position
		if (cThis != cOther)
			return false;
		}
	return true;
	}