void SeqVect::PadToMSA(MSA &msa) { unsigned uSeqCount = Length(); if (0 == uSeqCount) { msa.Clear(); return; } unsigned uLongestSeqLength = 0; for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { Seq *ptrSeq = at(uSeqIndex); unsigned uColCount = ptrSeq->Length(); if (uColCount > uLongestSeqLength) uLongestSeqLength = uColCount; } msa.SetSize(uSeqCount, uLongestSeqLength); for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) { Seq *ptrSeq = at(uSeqIndex); msa.SetSeqName(uSeqIndex, ptrSeq->GetName()); unsigned uColCount = ptrSeq->Length(); unsigned uColIndex; for (uColIndex = 0; uColIndex < uColCount; ++uColIndex) { char c = ptrSeq->at(uColIndex); msa.SetChar(uSeqIndex, uColIndex, c); } while (uColIndex < uLongestSeqLength) msa.SetChar(uSeqIndex, uColIndex++, '.'); } }
static SCORE Subst(const Seq &seqA, const Seq &seqB, unsigned i, unsigned j) { assert(i < seqA.Length()); assert(j < seqB.Length()); unsigned uLetterA = seqA.GetLetter(i); unsigned uLetterB = seqB.GetLetter(j); return VTML_SP[uLetterA][uLetterB] + g_scoreCenter; }
void Seq::CopyReversed(const Seq &rhs) { clear(); const unsigned uLength = rhs.Length(); const unsigned uBase = rhs.Length() - 1; for (unsigned uColIndex = 0; uColIndex < uLength; ++uColIndex) push_back(rhs.at(uBase - uColIndex)); const char *ptrName = rhs.GetName(); size_t n = strlen(ptrName) + 1; m_ptrName = new char[n]; strcpy(m_ptrName, ptrName); }
void convertMAlignment2SecVect(SeqVect& sv, const MultipleSequenceAlignment& ma, bool fixAlpha) { sv.Clear(); MuscleContext *ctx = getMuscleContext(); ctx->fillUidsVectors(ma->getNumRows()); unsigned i=0; unsigned seq_count = 0; foreach(const MultipleSequenceAlignmentRow& row, ma->getMsaRows()) { Seq *ptrSeq = new Seq(); QByteArray name = row->getName().toLocal8Bit(); ptrSeq->FromString(row->getCore().constData(), name.constData()); //stripping gaps, original Seq::StripGaps fails on MSVC9 Seq::iterator newEnd = std::remove(ptrSeq->begin(), ptrSeq->end(), U2Msa::GAP_CHAR); ptrSeq->erase(newEnd, ptrSeq->end()); if (ptrSeq->Length()!=0) { ctx->tmp_uIds[seq_count] = ctx->input_uIds[i]; sv.push_back(ptrSeq); seq_count++; } i++; } if (fixAlpha) { sv.FixAlpha(); } }
unsigned EstringOp(const short es[], const Seq &sIn, MSA &a) { unsigned uSymbols; unsigned uIndels; EstringCounts(es, &uSymbols, &uIndels); assert(sIn.Length() == uSymbols); unsigned uColCount = uSymbols + uIndels; a.Clear(); a.SetSize(1, uColCount); a.SetSeqName(0, sIn.GetName()); a.SetSeqId(0, sIn.GetId()); unsigned p = 0; unsigned uColIndex = 0; for (;;) { int n = *es++; if (0 == n) break; if (n > 0) for (int i = 0; i < n; ++i) { char c = sIn[p++]; a.SetChar(0, uColIndex++, c); } else for (int i = 0; i < -n; ++i) a.SetChar(0, uColIndex++, '-'); } assert(uColIndex == uColCount); return uColCount; }
void EstringOp(const short es[], const Seq &sIn, Seq &sOut) { #if DEBUG unsigned uSymbols; unsigned uIndels; EstringCounts(es, &uSymbols, &uIndels); assert(sIn.Length() == uSymbols); #endif sOut.Clear(); sOut.SetName(sIn.GetName()); int p = 0; for (;;) { int n = *es++; if (0 == n) break; if (n > 0) for (int i = 0; i < n; ++i) { char c = sIn[p++]; sOut.push_back(c); } else for (int i = 0; i < -n; ++i) sOut.push_back('-'); } }
bool Seq::EqIgnoreCase(const Seq &s) const { const unsigned n = Length(); if (n != s.Length()) { return false; } for (unsigned i = 0; i < n; ++i) { const char c1 = at(i); const char c2 = s.at(i); if (IsGap(c1)) { if (!IsGap(c2)) return false; } else { if (toupper(c1) != toupper(c2)) { return false; } } } return true; }
static void RowFromSeq(const Seq &s, SCORE *Row[]) { const unsigned uLength = s.Length(); for (unsigned i = 0; i < uLength; ++i) { char c = s.GetChar(i); unsigned uLetter = CharToLetter(c); if (uLetter < 20) Row[i] = VTML_SP[uLetter]; else Row[i] = VTML_SP[AX_X]; } }
static void LettersFromSeq(const Seq &s, unsigned Letters[]) { const unsigned uLength = s.Length(); for (unsigned i = 0; i < uLength; ++i) { char c = s.GetChar(i); unsigned uLetter = CharToLetter(c); if (uLetter < 20) Letters[i] = uLetter; else Letters[i] = AX_X; } }
bool Seq::Eq(const Seq &s) const { const unsigned n = Length(); if (n != s.Length()) return false; for (unsigned i = 0; i < n; ++i) { const char c1 = at(i); const char c2 = s.at(i); if (c1 != c2) return false; } return true; }
static void PathSeq(const Seq &s, const PWPath &Path, bool bRight, Seq &sOut) { short *esA; short *esB; PathToEstrings(Path, &esA, &esB); const unsigned uSeqLength = s.Length(); const unsigned uEdgeCount = Path.GetEdgeCount(); sOut.Clear(); sOut.SetName(s.GetName()); unsigned uPos = 0; for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) { const PWEdge &Edge = Path.GetEdge(uEdgeIndex); char cType = Edge.cType; if (bRight) { if (cType == 'I') cType = 'D'; else if (cType == 'D') cType = 'I'; } switch (cType) { case 'M': sOut.AppendChar(s[uPos++]); break; case 'D': sOut.AppendChar('-'); break; case 'I': sOut.AppendChar(s[uPos++]); break; default: Quit("PathSeq, invalid edge type %c", cType); } } }
SCORE GlobalAlignSS(const Seq &seqA, const Seq &seqB, PWPath &Path) { const unsigned uLengthA = seqA.Length(); const unsigned uLengthB = seqB.Length(); const unsigned uPrefixCountA = uLengthA + 1; const unsigned uPrefixCountB = uLengthB + 1; AllocDPMem(uLengthA, uLengthB); SCORE *MPrev = DPM.MPrev; SCORE *MCurr = DPM.MCurr; SCORE *MWork = DPM.MWork; SCORE *DPrev = DPM.DPrev; SCORE *DCurr = DPM.DCurr; SCORE *DWork = DPM.DWork; SCORE **MxRowA = DPM.MxRowA; unsigned *LettersB = DPM.LettersB; RowFromSeq(seqA, MxRowA); LettersFromSeq(seqB, LettersB); unsigned *uDeletePos = DPM.uDeletePos; int **TraceBack = DPM.TraceBack; #if DEBUG for (unsigned i = 0; i < uPrefixCountA; ++i) memset(TraceBack[i], 0, uPrefixCountB*sizeof(int)); #endif // Special case for i=0 TraceBack[0][0] = 0; MPrev[0] = MxRowA[0][LettersB[0]]; // D(0,0) is -infinity (requires I->D). DPrev[0] = MINUS_INFINITY; for (unsigned j = 1; j < uLengthB; ++j) { unsigned uLetterB = LettersB[j]; // Only way to get M(0, j) looks like this: // A ----X // B XXXXX // 0 j // So gap-open at j=0, gap-close at j-1. MPrev[j] = MxRowA[0][uLetterB] + g_scoreGapOpen/2; // term gaps half TraceBack[0][j] = -(int) j; // Assume no D->I transitions, then can't be a delete if only // one letter from A. DPrev[j] = MINUS_INFINITY; } SCORE IPrev_j_1; for (unsigned i = 1; i < uLengthA; ++i) { SCORE *ptrMCurr_j = MCurr; memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE)); const SCORE *RowA = MxRowA[i]; const SCORE *ptrRowA = MxRowA[i]; const SCORE *ptrMCurrEnd = ptrMCurr_j + uLengthB; unsigned *ptrLettersB = LettersB; for (; ptrMCurr_j != ptrMCurrEnd; ++ptrMCurr_j) { *ptrMCurr_j = RowA[*ptrLettersB]; ++ptrLettersB; } unsigned *ptrDeletePos = uDeletePos; // Special case for j=0 // Only way to get M(i, 0) looks like this: // 0 i // A XXXXX // B ----X // So gap-open at i=0, gap-close at i-1. ptrMCurr_j = MCurr; assert(ptrMCurr_j == &(MCurr[0])); *ptrMCurr_j += g_scoreGapOpen/2; // term gaps half ++ptrMCurr_j; int *ptrTraceBack_ij = TraceBack[i]; *ptrTraceBack_ij++ = (int) i; SCORE *ptrMPrev_j = MPrev; SCORE *ptrDPrev = DPrev; SCORE d = *ptrDPrev; SCORE DNew = *ptrMPrev_j + g_scoreGapOpen; if (DNew > d) { d = DNew; *ptrDeletePos = i; } SCORE *ptrDCurr = DCurr; assert(ptrDCurr == &(DCurr[0])); *ptrDCurr = d; // Can't have an insert if no letters from B IPrev_j_1 = MINUS_INFINITY; unsigned uInsertPos; for (unsigned j = 1; j < uLengthB; ++j) { // Here, MPrev_j is preserved from previous // iteration so with current i,j is M[i-1][j-1] SCORE MPrev_j = *ptrMPrev_j; SCORE INew = MPrev_j + g_scoreGapOpen; if (INew > IPrev_j_1) { IPrev_j_1 = INew; uInsertPos = j; } SCORE scoreMax = MPrev_j; assert(ptrDPrev == &(DPrev[j-1])); SCORE scoreD = *ptrDPrev++; if (scoreD > scoreMax) { scoreMax = scoreD; assert(ptrDeletePos == &(uDeletePos[j-1])); *ptrTraceBack_ij = (int) i - (int) *ptrDeletePos; assert(*ptrTraceBack_ij > 0); } ++ptrDeletePos; SCORE scoreI = IPrev_j_1; if (scoreI > scoreMax) { scoreMax = scoreI; *ptrTraceBack_ij = (int) uInsertPos - (int) j; assert(*ptrTraceBack_ij < 0); } *ptrMCurr_j += scoreMax; assert(ptrMCurr_j == &(MCurr[j])); ++ptrMCurr_j; MPrev_j = *(++ptrMPrev_j); assert(ptrDPrev == &(DPrev[j])); SCORE d = *ptrDPrev; SCORE DNew = MPrev_j + g_scoreGapOpen; if (DNew > d) { d = DNew; assert(ptrDeletePos == &uDeletePos[j]); *ptrDeletePos = i; } assert(ptrDCurr + 1 == &(DCurr[j])); *(++ptrDCurr) = d; ++ptrTraceBack_ij; } Rotate(MPrev, MCurr, MWork); Rotate(DPrev, DCurr, DWork); } // Special case for i=uLengthA SCORE IPrev = MINUS_INFINITY; unsigned uInsertPos; for (unsigned j = 1; j < uLengthB; ++j) { SCORE INew = MPrev[j-1]; if (INew > IPrev) { uInsertPos = j; IPrev = INew; } } // Special case for i=uLengthA, j=uLengthB SCORE scoreMax = MPrev[uLengthB-1]; int iTraceBack = 0; SCORE scoreD = DPrev[uLengthB-1] - g_scoreGapOpen/2; // term gaps half if (scoreD > scoreMax) { scoreMax = scoreD; iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1]; } SCORE scoreI = IPrev - g_scoreGapOpen/2; if (scoreI > scoreMax) { scoreMax = scoreI; iTraceBack = (int) uInsertPos - (int) uLengthB; } TraceBack[uLengthA][uLengthB] = iTraceBack; TraceBackToPath(TraceBack, uLengthA, uLengthB, Path); return scoreMax; }
void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[], MSA &a) { #if TRACE Log("MakeRootMSA Tree="); GuideTree.LogMe(); #endif const unsigned uSeqCount = v.GetSeqCount(); unsigned uColCount = uInsane; unsigned uSeqIndex = 0; const unsigned uTreeNodeCount = GuideTree.GetNodeCount(); const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex(); const PWPath &RootPath = Nodes[uRootNodeIndex].m_Path; const unsigned uRootColCount = RootPath.GetEdgeCount(); const unsigned uEstringSize = uRootColCount + 1; short *Estring1 = new short[uEstringSize]; short *Estring2 = new short[uEstringSize]; SetProgressDesc("Root alignment"); unsigned uTreeNodeIndex = GetFirstNodeIndex(GuideTree); do { Progress(uSeqIndex, uSeqCount); unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex); const Seq &s = *(v[uId]); Seq sRootE; short *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE, Estring1, Estring2); Nodes[uTreeNodeIndex].m_EstringL = EstringNewCopy(es); #if VALIDATE Seq sRoot; MakeRootSeq(s, GuideTree, uTreeNodeIndex, Nodes, sRoot); if (!sRoot.Eq(sRootE)) { Log("sRoot="); sRoot.LogMe(); Log("sRootE="); sRootE.LogMe(); Quit("Root seqs differ"); } #if TRACE Log("MakeRootSeq=\n"); sRoot.LogMe(); #endif #endif if (uInsane == uColCount) { uColCount = sRootE.Length(); a.SetSize(uSeqCount, uColCount); } else { assert(uColCount == sRootE.Length()); } a.SetSeqName(uSeqIndex, s.GetName()); a.SetSeqId(uSeqIndex, uId); for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) a.SetChar(uSeqIndex, uColIndex, sRootE[uColIndex]); ++uSeqIndex; uTreeNodeIndex = GetNextNodeIndex(GuideTree, uTreeNodeIndex); } while (NULL_NEIGHBOR != uTreeNodeIndex); delete[] Estring1; delete[] Estring2; ProgressStepsDone(); assert(uSeqIndex == uSeqCount); }
bool Seq::EqIgnoreCaseAndGaps(const Seq &s) const { const unsigned uThisLength = Length(); const unsigned uOtherLength = s.Length(); unsigned uThisPos = 0; unsigned uOtherPos = 0; int cThis; int cOther; for (;;) { if (uThisPos == uThisLength && uOtherPos == uOtherLength) break; // Set cThis to next non-gap character in this string // or -1 if end-of-string. for (;;) { if (uThisPos == uThisLength) { cThis = -1; break; } else { cThis = at(uThisPos); ++uThisPos; if (!IsGapChar(cThis)) { cThis = toupper(cThis); break; } } } // Set cOther to next non-gap character in s // or -1 if end-of-string. for (;;) { if (uOtherPos == uOtherLength) { cOther = -1; break; } else { cOther = s.at(uOtherPos); ++uOtherPos; if (!IsGapChar(cOther)) { cOther = toupper(cOther); break; } } } // Compare characters are corresponding ungapped position if (cThis != cOther) return false; } return true; }